contractforge-core 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_core/__init__.py +135 -0
- contractforge_core/adapters/__init__.py +15 -0
- contractforge_core/adapters/base/__init__.py +10 -0
- contractforge_core/adapters/base/generic.py +62 -0
- contractforge_core/adapters/base/protocol.py +29 -0
- contractforge_core/capabilities/__init__.py +4 -0
- contractforge_core/capabilities/models.py +29 -0
- contractforge_core/capabilities/native.py +53 -0
- contractforge_core/cli.py +96 -0
- contractforge_core/cli_connectors.py +29 -0
- contractforge_core/cli_contracts.py +178 -0
- contractforge_core/cli_init.py +117 -0
- contractforge_core/cli_io.py +22 -0
- contractforge_core/config.py +164 -0
- contractforge_core/connectors/__init__.py +140 -0
- contractforge_core/connectors/api/__init__.py +17 -0
- contractforge_core/connectors/api/rest/__init__.py +51 -0
- contractforge_core/connectors/api/rest/auth.py +104 -0
- contractforge_core/connectors/api/rest/pagination.py +92 -0
- contractforge_core/connectors/api/rest/reader.py +211 -0
- contractforge_core/connectors/api/rest/retry.py +21 -0
- contractforge_core/connectors/api/rest/safety.py +78 -0
- contractforge_core/connectors/api/rest/source.py +43 -0
- contractforge_core/connectors/api/rest/transport.py +15 -0
- contractforge_core/connectors/catalog/__init__.py +27 -0
- contractforge_core/connectors/catalog/catalog/__init__.py +29 -0
- contractforge_core/connectors/catalog/catalog/source.py +39 -0
- contractforge_core/connectors/catalog/catalog/table_refs.py +83 -0
- contractforge_core/connectors/databases/__init__.py +21 -0
- contractforge_core/connectors/databases/jdbc/__init__.py +23 -0
- contractforge_core/connectors/databases/jdbc/rds_iam.py +98 -0
- contractforge_core/connectors/databases/jdbc/source.py +161 -0
- contractforge_core/connectors/files/__init__.py +21 -0
- contractforge_core/connectors/files/files/__init__.py +21 -0
- contractforge_core/connectors/files/files/source.py +52 -0
- contractforge_core/connectors/http_files/__init__.py +27 -0
- contractforge_core/connectors/http_files/http_file/__init__.py +29 -0
- contractforge_core/connectors/http_files/http_file/reader.py +104 -0
- contractforge_core/connectors/http_files/http_file/retry.py +22 -0
- contractforge_core/connectors/http_files/http_file/safety.py +70 -0
- contractforge_core/connectors/http_files/http_file/source.py +82 -0
- contractforge_core/connectors/metadata.py +216 -0
- contractforge_core/connectors/native_passthrough/__init__.py +13 -0
- contractforge_core/connectors/native_passthrough/native_passthrough/__init__.py +13 -0
- contractforge_core/connectors/native_passthrough/native_passthrough/source.py +35 -0
- contractforge_core/connectors/registry.py +69 -0
- contractforge_core/connectors/sharing/__init__.py +8 -0
- contractforge_core/connectors/sharing/delta_share/__init__.py +8 -0
- contractforge_core/connectors/sharing/delta_share/source.py +22 -0
- contractforge_core/connectors/streams/__init__.py +25 -0
- contractforge_core/connectors/streams/eventhubs/__init__.py +17 -0
- contractforge_core/connectors/streams/eventhubs/source.py +44 -0
- contractforge_core/connectors/streams/kafka/__init__.py +17 -0
- contractforge_core/connectors/streams/kafka/source.py +53 -0
- contractforge_core/connectors/streams/source.py +40 -0
- contractforge_core/contracts/__init__.py +131 -0
- contractforge_core/contracts/access.py +161 -0
- contractforge_core/contracts/annotations.py +162 -0
- contractforge_core/contracts/base.py +85 -0
- contractforge_core/contracts/bundle.py +327 -0
- contractforge_core/contracts/environment.py +80 -0
- contractforge_core/contracts/execution.py +65 -0
- contractforge_core/contracts/governance.py +47 -0
- contractforge_core/contracts/governance_common.py +17 -0
- contractforge_core/contracts/naming.py +40 -0
- contractforge_core/contracts/normalize.py +129 -0
- contractforge_core/contracts/operations.py +80 -0
- contractforge_core/contracts/plan_validation.py +25 -0
- contractforge_core/contracts/quality.py +113 -0
- contractforge_core/contracts/root.py +179 -0
- contractforge_core/contracts/schema.py +65 -0
- contractforge_core/contracts/shape_validation.py +127 -0
- contractforge_core/contracts/source.py +61 -0
- contractforge_core/contracts/source_connector.py +115 -0
- contractforge_core/contracts/source_generic.py +122 -0
- contractforge_core/contracts/source_portability.py +88 -0
- contractforge_core/contracts/source_validation.py +230 -0
- contractforge_core/contracts/targeting.py +31 -0
- contractforge_core/contracts/transform.py +174 -0
- contractforge_core/diagnostics/__init__.py +5 -0
- contractforge_core/diagnostics/models.py +15 -0
- contractforge_core/errors.py +62 -0
- contractforge_core/evidence/__init__.py +43 -0
- contractforge_core/evidence/control_tables.py +147 -0
- contractforge_core/evidence/models.py +42 -0
- contractforge_core/evidence/records.py +101 -0
- contractforge_core/execution/__init__.py +27 -0
- contractforge_core/execution/results.py +18 -0
- contractforge_core/execution/strategy.py +27 -0
- contractforge_core/execution/windows.py +117 -0
- contractforge_core/execution/write_modes.py +32 -0
- contractforge_core/metrics/__init__.py +5 -0
- contractforge_core/metrics/write.py +25 -0
- contractforge_core/naming.py +144 -0
- contractforge_core/normalization/__init__.py +19 -0
- contractforge_core/normalization/common.py +26 -0
- contractforge_core/normalization/intents.py +111 -0
- contractforge_core/normalization/quality.py +100 -0
- contractforge_core/parity/__init__.py +5 -0
- contractforge_core/parity/models.py +53 -0
- contractforge_core/partitioning/__init__.py +5 -0
- contractforge_core/partitioning/predicates.py +15 -0
- contractforge_core/planner/__init__.py +18 -0
- contractforge_core/planner/governance_checks.py +50 -0
- contractforge_core/planner/matcher.py +60 -0
- contractforge_core/planner/plan_builder.py +29 -0
- contractforge_core/planner/result.py +47 -0
- contractforge_core/planner/semantic_checks.py +56 -0
- contractforge_core/planner/write_checks.py +156 -0
- contractforge_core/portability.py +53 -0
- contractforge_core/preparation/__init__.py +27 -0
- contractforge_core/preparation/staging.py +129 -0
- contractforge_core/project.py +101 -0
- contractforge_core/quality/__init__.py +24 -0
- contractforge_core/quality/results.py +73 -0
- contractforge_core/quality/rules.py +12 -0
- contractforge_core/reporting/__init__.py +5 -0
- contractforge_core/reporting/models.py +13 -0
- contractforge_core/results.py +27 -0
- contractforge_core/runtime/__init__.py +5 -0
- contractforge_core/runtime/models.py +38 -0
- contractforge_core/schema/__init__.py +13 -0
- contractforge_core/schema/diff.py +122 -0
- contractforge_core/schema/policy.py +25 -0
- contractforge_core/security/__init__.py +7 -0
- contractforge_core/security/redaction.py +73 -0
- contractforge_core/semantic/__init__.py +29 -0
- contractforge_core/semantic/models.py +108 -0
- contractforge_core/watermark.py +65 -0
- contractforge_core-0.1.0.dist-info/METADATA +374 -0
- contractforge_core-0.1.0.dist-info/RECORD +134 -0
- contractforge_core-0.1.0.dist-info/WHEEL +4 -0
- contractforge_core-0.1.0.dist-info/entry_points.txt +2 -0
- contractforge_core-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Starter contract generation for the core ContractForge CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from contractforge_core.cli_io import yaml_dump
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def init_contract(args: Any) -> int:
|
|
13
|
+
output = _base_output(args.output)
|
|
14
|
+
files = {
|
|
15
|
+
output.with_suffix(".ingestion.yaml"): _init_ingestion(args),
|
|
16
|
+
output.with_suffix(".annotations.yaml"): _init_annotations(args),
|
|
17
|
+
output.with_suffix(".operations.yaml"): _init_operations(args),
|
|
18
|
+
output.with_suffix(".access.yaml"): _init_access(args),
|
|
19
|
+
output.with_suffix(".environment.yaml"): _init_environment(args),
|
|
20
|
+
}
|
|
21
|
+
written = []
|
|
22
|
+
for path, payload in files.items():
|
|
23
|
+
_write_mapping(path, payload, force=args.force)
|
|
24
|
+
written.append(str(path))
|
|
25
|
+
return _print({"status": "SUCCESS", "written": written}, args.indent)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _init_ingestion(args: Any) -> dict[str, Any]:
|
|
29
|
+
merge_keys = _csv(args.merge_keys)
|
|
30
|
+
hash_keys = _csv(args.hash_keys) or merge_keys
|
|
31
|
+
if args.mode in {"scd1_upsert", "scd2_historical", "snapshot_soft_delete"} and not merge_keys:
|
|
32
|
+
raise ValueError(f"--merge-keys is required for mode={args.mode}")
|
|
33
|
+
if args.mode == "scd1_hash_diff" and not hash_keys:
|
|
34
|
+
raise ValueError("--hash-keys or --merge-keys is required for mode=scd1_hash_diff")
|
|
35
|
+
contract: dict[str, Any] = {
|
|
36
|
+
"source": {"type": "table", "table": args.source},
|
|
37
|
+
"target": {"catalog": args.catalog, "schema": args.target_schema or args.layer, "table": args.target_table},
|
|
38
|
+
"layer": args.layer,
|
|
39
|
+
"mode": args.mode,
|
|
40
|
+
"schema_policy": args.schema_policy,
|
|
41
|
+
}
|
|
42
|
+
if merge_keys:
|
|
43
|
+
contract["merge_keys"] = merge_keys
|
|
44
|
+
contract["quality_rules"] = {"not_null": merge_keys}
|
|
45
|
+
if args.mode == "scd1_hash_diff" and hash_keys:
|
|
46
|
+
contract["hash_keys"] = hash_keys
|
|
47
|
+
watermarks = _csv(args.watermark_columns)
|
|
48
|
+
if watermarks:
|
|
49
|
+
contract["watermark_columns"] = watermarks
|
|
50
|
+
return contract
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _init_annotations(args: Any) -> dict[str, Any]:
|
|
54
|
+
return {
|
|
55
|
+
"target": _target(args),
|
|
56
|
+
"table": {"description": args.description or f"TODO: describe {args.target_table}", "tags": {"domain": args.domain or "TODO"}},
|
|
57
|
+
"columns": {},
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _init_operations(args: Any) -> dict[str, Any]:
|
|
62
|
+
return {
|
|
63
|
+
"target": _target(args),
|
|
64
|
+
"ownership": {
|
|
65
|
+
"business_owner": args.owner or "TODO",
|
|
66
|
+
"technical_owner": args.technical_owner or "data-platform",
|
|
67
|
+
"support_group": args.support_group or "data-platform",
|
|
68
|
+
},
|
|
69
|
+
"operations": {
|
|
70
|
+
"criticality": args.criticality,
|
|
71
|
+
"expected_frequency": args.expected_frequency,
|
|
72
|
+
"freshness_sla_minutes": args.freshness_sla_minutes,
|
|
73
|
+
"alert_on_failure": True,
|
|
74
|
+
"alert_on_quality_fail": True,
|
|
75
|
+
"runbook_url": args.runbook_url or "TODO",
|
|
76
|
+
},
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _init_access(args: Any) -> dict[str, Any]:
|
|
81
|
+
return {
|
|
82
|
+
"target": _target(args),
|
|
83
|
+
"access_policy": {"mode": "validate_only", "on_drift": "warn", "revoke_unmanaged": False},
|
|
84
|
+
"grants": [{"principal": args.access_principal or "data-engineers", "privileges": ["SELECT"]}],
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _init_environment(args: Any) -> dict[str, Any]:
|
|
89
|
+
return {"name": "dev", "adapter": args.adapter, "evidence": {"catalog": args.catalog, "schema": "ops"}}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _target(args: Any) -> dict[str, str]:
|
|
93
|
+
return {"catalog": args.catalog, "schema": args.target_schema or args.layer, "table": args.target_table}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _write_mapping(path: Path, payload: dict[str, Any], *, force: bool) -> None:
|
|
97
|
+
if path.exists() and not force:
|
|
98
|
+
raise FileExistsError(f"{path} already exists; use --force to overwrite it")
|
|
99
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
path.write_text(yaml_dump(payload), encoding="utf-8")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _print(payload: object, indent: int) -> int:
|
|
104
|
+
print(json.dumps(payload, indent=indent, sort_keys=True, default=str))
|
|
105
|
+
return 0
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _csv(value: str | None) -> list[str]:
|
|
109
|
+
return [item.strip() for item in str(value or "").split(",") if item.strip()]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _base_output(path: Path) -> Path:
|
|
113
|
+
name = path.name
|
|
114
|
+
for suffix in (".ingestion.yaml", ".ingestion.yml", ".ingestion.json"):
|
|
115
|
+
if name.endswith(suffix):
|
|
116
|
+
return path.with_name(name[: -len(suffix)])
|
|
117
|
+
return path
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Small JSON/YAML helpers for core CLI modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def yaml_load(text: str) -> Any:
|
|
10
|
+
try:
|
|
11
|
+
import yaml # type: ignore
|
|
12
|
+
except Exception as exc: # pragma: no cover
|
|
13
|
+
raise RuntimeError("YAML support requires PyYAML; use JSON files or install PyYAML") from exc
|
|
14
|
+
return yaml.safe_load(text)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def yaml_dump(payload: dict[str, Any]) -> str:
|
|
18
|
+
try:
|
|
19
|
+
import yaml # type: ignore
|
|
20
|
+
except Exception: # pragma: no cover
|
|
21
|
+
return json.dumps(payload, indent=2, sort_keys=False) + "\n"
|
|
22
|
+
return yaml.safe_dump(payload, allow_unicode=True, sort_keys=False)
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Platform-neutral constants and shared type aliases."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal, Union
|
|
6
|
+
|
|
7
|
+
FRAMEWORK_VERSION = "1.0.0"
|
|
8
|
+
CTRL_SCHEMA_VERSION = 1
|
|
9
|
+
|
|
10
|
+
Layer = str
|
|
11
|
+
|
|
12
|
+
WriteMode = Literal[
|
|
13
|
+
"scd0_append",
|
|
14
|
+
"scd0_overwrite",
|
|
15
|
+
"scd1_upsert",
|
|
16
|
+
"scd1_hash_diff",
|
|
17
|
+
"scd2_historical",
|
|
18
|
+
"snapshot_soft_delete",
|
|
19
|
+
]
|
|
20
|
+
WriteEngine = Literal[
|
|
21
|
+
"auto",
|
|
22
|
+
"core_managed",
|
|
23
|
+
"native_merge",
|
|
24
|
+
"native_cdc",
|
|
25
|
+
]
|
|
26
|
+
WriteEngineFallbackPolicy = Literal["fail", "fallback_to_core", "preview_only"]
|
|
27
|
+
SchemaPolicy = Literal["permissive", "additive_only", "strict"]
|
|
28
|
+
QualityFailAction = Literal["fail", "warn", "quarantine"]
|
|
29
|
+
QualityRuleSeverity = Literal["warn", "quarantine", "abort"]
|
|
30
|
+
SCD2LateArrivingPolicy = Literal["apply", "ignore", "reject"]
|
|
31
|
+
GovernanceFailurePolicy = Literal["fail", "warn", "ignore"]
|
|
32
|
+
AccessMode = Literal["apply", "validate_only", "ignore"]
|
|
33
|
+
AccessDriftPolicy = Literal["fail", "warn", "reconcile"]
|
|
34
|
+
IdempotencyPolicy = Literal["always_run", "skip_if_success", "fail_if_success", "rerun_if_failed"]
|
|
35
|
+
Source = Union[str, dict[str, Any]]
|
|
36
|
+
|
|
37
|
+
CUSTOM_WRITE_MODE_PREFIX = "custom:"
|
|
38
|
+
|
|
39
|
+
VALID_WRITE_MODES = {
|
|
40
|
+
"scd0_append",
|
|
41
|
+
"scd0_overwrite",
|
|
42
|
+
"scd1_upsert",
|
|
43
|
+
"scd1_hash_diff",
|
|
44
|
+
"scd2_historical",
|
|
45
|
+
"snapshot_soft_delete",
|
|
46
|
+
}
|
|
47
|
+
VALID_WRITE_ENGINES = {"auto", "core_managed", "native_merge", "native_cdc"}
|
|
48
|
+
VALID_WRITE_ENGINE_FALLBACK_POLICIES = {"fail", "fallback_to_core", "preview_only"}
|
|
49
|
+
VALID_SCHEMA_POLICIES = {"permissive", "additive_only", "strict"}
|
|
50
|
+
VALID_QUALITY_FAIL_ACTIONS = {"fail", "warn", "quarantine"}
|
|
51
|
+
VALID_QUALITY_RULE_SEVERITIES = {"warn", "quarantine", "abort"}
|
|
52
|
+
VALID_SCD2_LATE_ARRIVING_POLICIES = {"apply", "ignore", "reject"}
|
|
53
|
+
VALID_GOVERNANCE_FAILURE_POLICIES = {"fail", "warn", "ignore"}
|
|
54
|
+
VALID_ACCESS_MODES = {"apply", "validate_only", "ignore"}
|
|
55
|
+
VALID_ACCESS_DRIFT_POLICIES = {"fail", "warn", "reconcile"}
|
|
56
|
+
VALID_CRITICALITY_LEVELS = {"low", "medium", "high", "critical"}
|
|
57
|
+
VALID_EXPECTED_FREQUENCIES = {"hourly", "daily", "weekly", "monthly", "ad_hoc"}
|
|
58
|
+
VALID_SENSITIVITY_LEVELS = {"public", "internal", "restricted", "confidential"}
|
|
59
|
+
VALID_PII_TYPES = {
|
|
60
|
+
"address",
|
|
61
|
+
"bank_account",
|
|
62
|
+
"birth_date",
|
|
63
|
+
"credit_card",
|
|
64
|
+
"device_id",
|
|
65
|
+
"document",
|
|
66
|
+
"email",
|
|
67
|
+
"financial",
|
|
68
|
+
"health",
|
|
69
|
+
"ip_address",
|
|
70
|
+
"name",
|
|
71
|
+
"national_id",
|
|
72
|
+
"other",
|
|
73
|
+
"phone",
|
|
74
|
+
"ssn",
|
|
75
|
+
"tax_id",
|
|
76
|
+
"unknown",
|
|
77
|
+
}
|
|
78
|
+
VALID_ACCESS_PRIVILEGES = {
|
|
79
|
+
"ALL PRIVILEGES",
|
|
80
|
+
"APPLY TAG",
|
|
81
|
+
"CREATE",
|
|
82
|
+
"CREATE FUNCTION",
|
|
83
|
+
"CREATE MODEL",
|
|
84
|
+
"CREATE TABLE",
|
|
85
|
+
"CREATE VOLUME",
|
|
86
|
+
"EXECUTE",
|
|
87
|
+
"MANAGE",
|
|
88
|
+
"MODIFY",
|
|
89
|
+
"READ FILES",
|
|
90
|
+
"READ VOLUME",
|
|
91
|
+
"REFRESH",
|
|
92
|
+
"SELECT",
|
|
93
|
+
"USAGE",
|
|
94
|
+
"WRITE FILES",
|
|
95
|
+
"WRITE VOLUME",
|
|
96
|
+
}
|
|
97
|
+
VALID_IDEMPOTENCY_POLICIES = {"always_run", "skip_if_success", "fail_if_success", "rerun_if_failed"}
|
|
98
|
+
VALID_EXPLAIN_FORMATS = {"simple", "extended", "codegen", "cost", "formatted"}
|
|
99
|
+
|
|
100
|
+
VALID_SOURCE_TYPES = {"connector"}
|
|
101
|
+
VALID_SOURCE_CONNECTORS = {
|
|
102
|
+
"adls",
|
|
103
|
+
"avro",
|
|
104
|
+
"azure_blob",
|
|
105
|
+
"bigquery_jdbc",
|
|
106
|
+
"blob",
|
|
107
|
+
"csv",
|
|
108
|
+
"db2",
|
|
109
|
+
"delta",
|
|
110
|
+
"delta_share",
|
|
111
|
+
"delta_table",
|
|
112
|
+
"eventhubs_available_now",
|
|
113
|
+
"eventhubs_bounded",
|
|
114
|
+
"gcs",
|
|
115
|
+
"http_csv",
|
|
116
|
+
"http_file",
|
|
117
|
+
"http_json",
|
|
118
|
+
"http_text",
|
|
119
|
+
"iceberg_table",
|
|
120
|
+
"incremental_files",
|
|
121
|
+
"jdbc",
|
|
122
|
+
"json",
|
|
123
|
+
"kafka_available_now",
|
|
124
|
+
"kafka_bounded",
|
|
125
|
+
"mariadb",
|
|
126
|
+
"mysql",
|
|
127
|
+
"native_passthrough",
|
|
128
|
+
"object_storage",
|
|
129
|
+
"oracle",
|
|
130
|
+
"orc",
|
|
131
|
+
"parquet",
|
|
132
|
+
"postgres",
|
|
133
|
+
"redshift",
|
|
134
|
+
"rest_api",
|
|
135
|
+
"s3",
|
|
136
|
+
"snowflake_jdbc",
|
|
137
|
+
"sql",
|
|
138
|
+
"sqlserver",
|
|
139
|
+
"table",
|
|
140
|
+
"text",
|
|
141
|
+
"view",
|
|
142
|
+
"xml",
|
|
143
|
+
}
|
|
144
|
+
VALID_OBJECT_STORAGE_PROVIDERS = {"adls", "azure_blob", "gcs", "s3"}
|
|
145
|
+
VALID_FILE_CONNECTOR_FORMATS = {"avro", "csv", "delta", "json", "jsonl", "ndjson", "orc", "parquet", "text", "xml"}
|
|
146
|
+
VALID_HTTP_FILE_FORMATS = {"csv", "json", "jsonl", "ndjson", "text"}
|
|
147
|
+
VALID_SOURCE_TRIGGERS = {"available_now"}
|
|
148
|
+
ARRAY_MODES = {"explode", "explode_outer", "first", "keep", "size", "to_json"}
|
|
149
|
+
MAX_INLINE_ACCEPTED_VALUES = 1000
|
|
150
|
+
CONTROL_COLUMNS = {
|
|
151
|
+
"ingestion_ts_utc",
|
|
152
|
+
"__run_id",
|
|
153
|
+
"row_hash",
|
|
154
|
+
"valid_from",
|
|
155
|
+
"valid_to",
|
|
156
|
+
"is_current",
|
|
157
|
+
"is_active",
|
|
158
|
+
"deleted_at",
|
|
159
|
+
"changed_columns",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def is_valid_write_mode(value: str) -> bool:
|
|
164
|
+
return value in VALID_WRITE_MODES or value.startswith(CUSTOM_WRITE_MODE_PREFIX)
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Platform-neutral connector helpers."""
|
|
2
|
+
|
|
3
|
+
from contractforge_core.connectors.http_files import (
|
|
4
|
+
HTTP_FILE_TYPES,
|
|
5
|
+
cleanup_http_file_downloads,
|
|
6
|
+
download_http_file,
|
|
7
|
+
http_file_format,
|
|
8
|
+
http_file_headers,
|
|
9
|
+
http_file_params,
|
|
10
|
+
http_file_reader_options,
|
|
11
|
+
http_file_url,
|
|
12
|
+
is_http_file_source,
|
|
13
|
+
read_http_file_payload,
|
|
14
|
+
)
|
|
15
|
+
from contractforge_core.connectors.catalog import (
|
|
16
|
+
CATALOG_SOURCE_TYPES,
|
|
17
|
+
LogicalTableReference,
|
|
18
|
+
TableRefResolver,
|
|
19
|
+
catalog_source_query,
|
|
20
|
+
catalog_source_table_or_path,
|
|
21
|
+
has_table_reference_placeholders,
|
|
22
|
+
is_catalog_source,
|
|
23
|
+
parse_logical_table_reference,
|
|
24
|
+
render_table_reference_placeholders,
|
|
25
|
+
source_logical_table_reference,
|
|
26
|
+
)
|
|
27
|
+
from contractforge_core.connectors.files import (
|
|
28
|
+
FILE_SOURCE_TYPES,
|
|
29
|
+
OBJECT_STORAGE_TYPES,
|
|
30
|
+
file_reader_options,
|
|
31
|
+
file_source_format,
|
|
32
|
+
is_file_source,
|
|
33
|
+
normalize_file_format,
|
|
34
|
+
object_storage_provider,
|
|
35
|
+
)
|
|
36
|
+
from contractforge_core.connectors.streams import (
|
|
37
|
+
AVAILABLE_NOW_STREAM_TYPES,
|
|
38
|
+
BOUNDED_STREAM_TYPES,
|
|
39
|
+
STREAM_SOURCE_TYPES,
|
|
40
|
+
eventhubs_bounded_options,
|
|
41
|
+
is_available_now_stream_source,
|
|
42
|
+
is_bounded_stream_source,
|
|
43
|
+
is_eventhubs_stream_source,
|
|
44
|
+
is_kafka_stream_source,
|
|
45
|
+
kafka_bounded_options,
|
|
46
|
+
stream_source_format,
|
|
47
|
+
)
|
|
48
|
+
from contractforge_core.connectors.sharing import delta_share_options, is_delta_share_source
|
|
49
|
+
from contractforge_core.connectors.databases import JDBC_CONNECTORS, jdbc_common_options, validate_jdbc_source
|
|
50
|
+
from contractforge_core.connectors.native_passthrough import (
|
|
51
|
+
is_native_passthrough_source,
|
|
52
|
+
native_passthrough_descriptor,
|
|
53
|
+
redact_secret_fields,
|
|
54
|
+
)
|
|
55
|
+
from contractforge_core.connectors.metadata import (
|
|
56
|
+
diagnose_source_connectors,
|
|
57
|
+
list_source_connector_details,
|
|
58
|
+
source_capabilities,
|
|
59
|
+
source_connector_details,
|
|
60
|
+
source_metadata_from_contract,
|
|
61
|
+
source_metadata_from_mapping,
|
|
62
|
+
source_provider,
|
|
63
|
+
)
|
|
64
|
+
from contractforge_core.connectors.databases import (
|
|
65
|
+
generate_rds_iam_auth_token,
|
|
66
|
+
infer_aws_region_from_rds_host,
|
|
67
|
+
parse_jdbc_host_port,
|
|
68
|
+
rds_iam_review_options,
|
|
69
|
+
)
|
|
70
|
+
from contractforge_core.connectors.api.rest import (
|
|
71
|
+
REST_API_CONNECTORS,
|
|
72
|
+
is_rest_api_connector,
|
|
73
|
+
read_rest_api_records,
|
|
74
|
+
rest_api_descriptor,
|
|
75
|
+
rest_request_headers,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
__all__ = [
|
|
79
|
+
"HTTP_FILE_TYPES",
|
|
80
|
+
"cleanup_http_file_downloads",
|
|
81
|
+
"download_http_file",
|
|
82
|
+
"read_rest_api_records",
|
|
83
|
+
"rest_request_headers",
|
|
84
|
+
"JDBC_CONNECTORS",
|
|
85
|
+
"CATALOG_SOURCE_TYPES",
|
|
86
|
+
"LogicalTableReference",
|
|
87
|
+
"TableRefResolver",
|
|
88
|
+
"AVAILABLE_NOW_STREAM_TYPES",
|
|
89
|
+
"BOUNDED_STREAM_TYPES",
|
|
90
|
+
"STREAM_SOURCE_TYPES",
|
|
91
|
+
"FILE_SOURCE_TYPES",
|
|
92
|
+
"OBJECT_STORAGE_TYPES",
|
|
93
|
+
"REST_API_CONNECTORS",
|
|
94
|
+
"catalog_source_query",
|
|
95
|
+
"catalog_source_table_or_path",
|
|
96
|
+
"delta_share_options",
|
|
97
|
+
"diagnose_source_connectors",
|
|
98
|
+
"eventhubs_bounded_options",
|
|
99
|
+
"file_reader_options",
|
|
100
|
+
"file_source_format",
|
|
101
|
+
"generate_rds_iam_auth_token",
|
|
102
|
+
"has_table_reference_placeholders",
|
|
103
|
+
"http_file_format",
|
|
104
|
+
"http_file_headers",
|
|
105
|
+
"http_file_params",
|
|
106
|
+
"http_file_reader_options",
|
|
107
|
+
"http_file_url",
|
|
108
|
+
"read_http_file_payload",
|
|
109
|
+
"infer_aws_region_from_rds_host",
|
|
110
|
+
"is_http_file_source",
|
|
111
|
+
"is_available_now_stream_source",
|
|
112
|
+
"is_bounded_stream_source",
|
|
113
|
+
"is_catalog_source",
|
|
114
|
+
"is_delta_share_source",
|
|
115
|
+
"is_eventhubs_stream_source",
|
|
116
|
+
"is_file_source",
|
|
117
|
+
"is_kafka_stream_source",
|
|
118
|
+
"stream_source_format",
|
|
119
|
+
"jdbc_common_options",
|
|
120
|
+
"list_source_connector_details",
|
|
121
|
+
"is_native_passthrough_source",
|
|
122
|
+
"is_rest_api_connector",
|
|
123
|
+
"kafka_bounded_options",
|
|
124
|
+
"native_passthrough_descriptor",
|
|
125
|
+
"parse_jdbc_host_port",
|
|
126
|
+
"parse_logical_table_reference",
|
|
127
|
+
"redact_secret_fields",
|
|
128
|
+
"render_table_reference_placeholders",
|
|
129
|
+
"rest_api_descriptor",
|
|
130
|
+
"normalize_file_format",
|
|
131
|
+
"object_storage_provider",
|
|
132
|
+
"rds_iam_review_options",
|
|
133
|
+
"source_capabilities",
|
|
134
|
+
"source_connector_details",
|
|
135
|
+
"source_metadata_from_contract",
|
|
136
|
+
"source_metadata_from_mapping",
|
|
137
|
+
"source_provider",
|
|
138
|
+
"source_logical_table_reference",
|
|
139
|
+
"validate_jdbc_source",
|
|
140
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Facade for the API connector family."""
|
|
2
|
+
|
|
3
|
+
from contractforge_core.connectors.api.rest import (
|
|
4
|
+
REST_API_CONNECTORS,
|
|
5
|
+
is_rest_api_connector,
|
|
6
|
+
read_rest_api_records,
|
|
7
|
+
rest_api_descriptor,
|
|
8
|
+
rest_request_headers,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"REST_API_CONNECTORS",
|
|
13
|
+
"is_rest_api_connector",
|
|
14
|
+
"read_rest_api_records",
|
|
15
|
+
"rest_api_descriptor",
|
|
16
|
+
"rest_request_headers",
|
|
17
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Facade for the bounded REST API connector."""
|
|
2
|
+
|
|
3
|
+
from contractforge_core.connectors.api.rest.auth import rest_request_headers
|
|
4
|
+
from contractforge_core.connectors.api.rest.pagination import (
|
|
5
|
+
json_path,
|
|
6
|
+
link_header_next,
|
|
7
|
+
max_pages_for_source,
|
|
8
|
+
next_url,
|
|
9
|
+
page_urls,
|
|
10
|
+
pagination_type,
|
|
11
|
+
url_with_params,
|
|
12
|
+
)
|
|
13
|
+
from contractforge_core.connectors.api.rest.reader import read_rest_api_records
|
|
14
|
+
from contractforge_core.connectors.api.rest.retry import (
|
|
15
|
+
RETRYABLE_HTTP_STATUS,
|
|
16
|
+
is_retryable_http_error,
|
|
17
|
+
is_retryable_network_error,
|
|
18
|
+
sleep_retry_backoff,
|
|
19
|
+
)
|
|
20
|
+
from contractforge_core.connectors.api.rest.safety import (
|
|
21
|
+
ALLOW_PRIVATE_FLAG,
|
|
22
|
+
ALLOWED_SCHEMES,
|
|
23
|
+
validate_http_target,
|
|
24
|
+
)
|
|
25
|
+
from contractforge_core.connectors.api.rest.source import (
|
|
26
|
+
REST_API_CONNECTORS,
|
|
27
|
+
is_rest_api_connector,
|
|
28
|
+
rest_api_descriptor,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"ALLOWED_SCHEMES",
|
|
33
|
+
"ALLOW_PRIVATE_FLAG",
|
|
34
|
+
"REST_API_CONNECTORS",
|
|
35
|
+
"RETRYABLE_HTTP_STATUS",
|
|
36
|
+
"is_retryable_http_error",
|
|
37
|
+
"is_retryable_network_error",
|
|
38
|
+
"is_rest_api_connector",
|
|
39
|
+
"json_path",
|
|
40
|
+
"link_header_next",
|
|
41
|
+
"max_pages_for_source",
|
|
42
|
+
"next_url",
|
|
43
|
+
"page_urls",
|
|
44
|
+
"pagination_type",
|
|
45
|
+
"read_rest_api_records",
|
|
46
|
+
"rest_api_descriptor",
|
|
47
|
+
"rest_request_headers",
|
|
48
|
+
"sleep_retry_backoff",
|
|
49
|
+
"url_with_params",
|
|
50
|
+
"validate_http_target",
|
|
51
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Auth header construction for the platform-neutral bounded REST client.
|
|
2
|
+
|
|
3
|
+
This module operates on already-resolved values. Adapters are responsible for
|
|
4
|
+
resolving any secret placeholders (e.g. via their platform secret store) before
|
|
5
|
+
calling these helpers, so the core never depends on a platform secret backend.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import base64
|
|
11
|
+
import json
|
|
12
|
+
import urllib.parse
|
|
13
|
+
import urllib.request
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from contractforge_core.connectors.api.rest.safety import validate_http_target
|
|
17
|
+
from contractforge_core.connectors.api.rest.transport import open_request as _open_request
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def rest_request_headers(
|
|
21
|
+
source: dict[str, Any],
|
|
22
|
+
incremental: dict[str, Any] | None = None,
|
|
23
|
+
watermark: str | None = None,
|
|
24
|
+
) -> dict[str, str]:
|
|
25
|
+
request = _dict(source.get("request"))
|
|
26
|
+
headers = _string_dict(request.get("headers"))
|
|
27
|
+
auth = _dict(source.get("auth"))
|
|
28
|
+
auth_type = str(auth.get("type") or "").strip().lower()
|
|
29
|
+
if auth_type == "bearer_token":
|
|
30
|
+
token = auth.get("token")
|
|
31
|
+
if not token:
|
|
32
|
+
raise ValueError("REST API bearer_token auth requires auth.token")
|
|
33
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
34
|
+
if auth_type == "api_key":
|
|
35
|
+
api_key = auth.get("value")
|
|
36
|
+
if not api_key:
|
|
37
|
+
raise ValueError("REST API api_key auth requires auth.value")
|
|
38
|
+
headers[str(auth.get("header") or "X-Api-Key")] = str(api_key)
|
|
39
|
+
if auth_type == "basic":
|
|
40
|
+
username = auth.get("username")
|
|
41
|
+
password = auth.get("password")
|
|
42
|
+
if not username or not password:
|
|
43
|
+
raise ValueError("REST API basic auth requires auth.username and auth.password")
|
|
44
|
+
raw = f"{username}:{password}".encode("utf-8")
|
|
45
|
+
headers["Authorization"] = "Basic " + base64.b64encode(raw).decode("ascii")
|
|
46
|
+
if auth_type == "oauth_client_credentials":
|
|
47
|
+
headers["Authorization"] = f"Bearer {_oauth_client_credentials_token(auth, source)}"
|
|
48
|
+
if auth and auth_type not in {"bearer_token", "api_key", "basic", "oauth_client_credentials"}:
|
|
49
|
+
raise ValueError(f"REST API auth.type={auth_type!r} is not supported")
|
|
50
|
+
incremental = incremental or {}
|
|
51
|
+
if watermark and incremental.get("watermark_header"):
|
|
52
|
+
headers[str(incremental["watermark_header"])] = watermark
|
|
53
|
+
return headers
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _oauth_client_credentials_token(auth: dict[str, Any], source: dict[str, Any]) -> str:
|
|
57
|
+
token_url = str(auth.get("token_url") or "").strip()
|
|
58
|
+
tenant_id = str(auth.get("tenant_id") or "").strip()
|
|
59
|
+
if not token_url and tenant_id:
|
|
60
|
+
token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
|
|
61
|
+
client_id = auth.get("client_id")
|
|
62
|
+
client_secret = auth.get("client_secret")
|
|
63
|
+
if not token_url or not client_id or not client_secret:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"REST API OAuth client credentials auth requires auth.token_url, auth.client_id and auth.client_secret; "
|
|
66
|
+
"auth.tenant_id can replace auth.token_url for Microsoft Entra ID."
|
|
67
|
+
)
|
|
68
|
+
fields = {
|
|
69
|
+
"grant_type": "client_credentials",
|
|
70
|
+
"client_id": str(client_id),
|
|
71
|
+
"client_secret": str(client_secret),
|
|
72
|
+
}
|
|
73
|
+
scope = auth.get("scope")
|
|
74
|
+
scopes = auth.get("scopes")
|
|
75
|
+
if scope:
|
|
76
|
+
fields["scope"] = str(scope)
|
|
77
|
+
elif isinstance(scopes, (list, tuple, set)):
|
|
78
|
+
fields["scope"] = " ".join(str(item) for item in scopes if str(item).strip())
|
|
79
|
+
elif scopes:
|
|
80
|
+
fields["scope"] = str(scopes)
|
|
81
|
+
timeout = int(_dict(source.get("limits")).get("timeout_seconds", 60))
|
|
82
|
+
validate_http_target(token_url, context="REST API OAuth token URL")
|
|
83
|
+
request = urllib.request.Request(
|
|
84
|
+
token_url,
|
|
85
|
+
method="POST",
|
|
86
|
+
data=urllib.parse.urlencode(fields).encode("utf-8"),
|
|
87
|
+
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
88
|
+
)
|
|
89
|
+
with _open_request(request, timeout=timeout) as response:
|
|
90
|
+
raw = response.read()
|
|
91
|
+
encoding = response.headers.get_content_charset() if hasattr(response.headers, "get_content_charset") else None
|
|
92
|
+
payload = json.loads(raw.decode(encoding or "utf-8")) if raw else {}
|
|
93
|
+
token = payload.get("access_token")
|
|
94
|
+
if not token:
|
|
95
|
+
raise ValueError("OAuth response did not return access_token")
|
|
96
|
+
return str(token)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _dict(value: object) -> dict[str, Any]:
|
|
100
|
+
return dict(value) if isinstance(value, dict) else {}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _string_dict(value: object) -> dict[str, str]:
|
|
104
|
+
return {str(key): str(item) for key, item in _dict(value).items()}
|