contractforge-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. contractforge_core/__init__.py +135 -0
  2. contractforge_core/adapters/__init__.py +15 -0
  3. contractforge_core/adapters/base/__init__.py +10 -0
  4. contractforge_core/adapters/base/generic.py +62 -0
  5. contractforge_core/adapters/base/protocol.py +29 -0
  6. contractforge_core/capabilities/__init__.py +4 -0
  7. contractforge_core/capabilities/models.py +29 -0
  8. contractforge_core/capabilities/native.py +53 -0
  9. contractforge_core/cli.py +96 -0
  10. contractforge_core/cli_connectors.py +29 -0
  11. contractforge_core/cli_contracts.py +178 -0
  12. contractforge_core/cli_init.py +117 -0
  13. contractforge_core/cli_io.py +22 -0
  14. contractforge_core/config.py +164 -0
  15. contractforge_core/connectors/__init__.py +140 -0
  16. contractforge_core/connectors/api/__init__.py +17 -0
  17. contractforge_core/connectors/api/rest/__init__.py +51 -0
  18. contractforge_core/connectors/api/rest/auth.py +104 -0
  19. contractforge_core/connectors/api/rest/pagination.py +92 -0
  20. contractforge_core/connectors/api/rest/reader.py +211 -0
  21. contractforge_core/connectors/api/rest/retry.py +21 -0
  22. contractforge_core/connectors/api/rest/safety.py +78 -0
  23. contractforge_core/connectors/api/rest/source.py +43 -0
  24. contractforge_core/connectors/api/rest/transport.py +15 -0
  25. contractforge_core/connectors/catalog/__init__.py +27 -0
  26. contractforge_core/connectors/catalog/catalog/__init__.py +29 -0
  27. contractforge_core/connectors/catalog/catalog/source.py +39 -0
  28. contractforge_core/connectors/catalog/catalog/table_refs.py +83 -0
  29. contractforge_core/connectors/databases/__init__.py +21 -0
  30. contractforge_core/connectors/databases/jdbc/__init__.py +23 -0
  31. contractforge_core/connectors/databases/jdbc/rds_iam.py +98 -0
  32. contractforge_core/connectors/databases/jdbc/source.py +161 -0
  33. contractforge_core/connectors/files/__init__.py +21 -0
  34. contractforge_core/connectors/files/files/__init__.py +21 -0
  35. contractforge_core/connectors/files/files/source.py +52 -0
  36. contractforge_core/connectors/http_files/__init__.py +27 -0
  37. contractforge_core/connectors/http_files/http_file/__init__.py +29 -0
  38. contractforge_core/connectors/http_files/http_file/reader.py +104 -0
  39. contractforge_core/connectors/http_files/http_file/retry.py +22 -0
  40. contractforge_core/connectors/http_files/http_file/safety.py +70 -0
  41. contractforge_core/connectors/http_files/http_file/source.py +82 -0
  42. contractforge_core/connectors/metadata.py +216 -0
  43. contractforge_core/connectors/native_passthrough/__init__.py +13 -0
  44. contractforge_core/connectors/native_passthrough/native_passthrough/__init__.py +13 -0
  45. contractforge_core/connectors/native_passthrough/native_passthrough/source.py +35 -0
  46. contractforge_core/connectors/registry.py +69 -0
  47. contractforge_core/connectors/sharing/__init__.py +8 -0
  48. contractforge_core/connectors/sharing/delta_share/__init__.py +8 -0
  49. contractforge_core/connectors/sharing/delta_share/source.py +22 -0
  50. contractforge_core/connectors/streams/__init__.py +25 -0
  51. contractforge_core/connectors/streams/eventhubs/__init__.py +17 -0
  52. contractforge_core/connectors/streams/eventhubs/source.py +44 -0
  53. contractforge_core/connectors/streams/kafka/__init__.py +17 -0
  54. contractforge_core/connectors/streams/kafka/source.py +53 -0
  55. contractforge_core/connectors/streams/source.py +40 -0
  56. contractforge_core/contracts/__init__.py +131 -0
  57. contractforge_core/contracts/access.py +161 -0
  58. contractforge_core/contracts/annotations.py +162 -0
  59. contractforge_core/contracts/base.py +85 -0
  60. contractforge_core/contracts/bundle.py +327 -0
  61. contractforge_core/contracts/environment.py +80 -0
  62. contractforge_core/contracts/execution.py +65 -0
  63. contractforge_core/contracts/governance.py +47 -0
  64. contractforge_core/contracts/governance_common.py +17 -0
  65. contractforge_core/contracts/naming.py +40 -0
  66. contractforge_core/contracts/normalize.py +129 -0
  67. contractforge_core/contracts/operations.py +80 -0
  68. contractforge_core/contracts/plan_validation.py +25 -0
  69. contractforge_core/contracts/quality.py +113 -0
  70. contractforge_core/contracts/root.py +179 -0
  71. contractforge_core/contracts/schema.py +65 -0
  72. contractforge_core/contracts/shape_validation.py +127 -0
  73. contractforge_core/contracts/source.py +61 -0
  74. contractforge_core/contracts/source_connector.py +115 -0
  75. contractforge_core/contracts/source_generic.py +122 -0
  76. contractforge_core/contracts/source_portability.py +88 -0
  77. contractforge_core/contracts/source_validation.py +230 -0
  78. contractforge_core/contracts/targeting.py +31 -0
  79. contractforge_core/contracts/transform.py +174 -0
  80. contractforge_core/diagnostics/__init__.py +5 -0
  81. contractforge_core/diagnostics/models.py +15 -0
  82. contractforge_core/errors.py +62 -0
  83. contractforge_core/evidence/__init__.py +43 -0
  84. contractforge_core/evidence/control_tables.py +147 -0
  85. contractforge_core/evidence/models.py +42 -0
  86. contractforge_core/evidence/records.py +101 -0
  87. contractforge_core/execution/__init__.py +27 -0
  88. contractforge_core/execution/results.py +18 -0
  89. contractforge_core/execution/strategy.py +27 -0
  90. contractforge_core/execution/windows.py +117 -0
  91. contractforge_core/execution/write_modes.py +32 -0
  92. contractforge_core/metrics/__init__.py +5 -0
  93. contractforge_core/metrics/write.py +25 -0
  94. contractforge_core/naming.py +144 -0
  95. contractforge_core/normalization/__init__.py +19 -0
  96. contractforge_core/normalization/common.py +26 -0
  97. contractforge_core/normalization/intents.py +111 -0
  98. contractforge_core/normalization/quality.py +100 -0
  99. contractforge_core/parity/__init__.py +5 -0
  100. contractforge_core/parity/models.py +53 -0
  101. contractforge_core/partitioning/__init__.py +5 -0
  102. contractforge_core/partitioning/predicates.py +15 -0
  103. contractforge_core/planner/__init__.py +18 -0
  104. contractforge_core/planner/governance_checks.py +50 -0
  105. contractforge_core/planner/matcher.py +60 -0
  106. contractforge_core/planner/plan_builder.py +29 -0
  107. contractforge_core/planner/result.py +47 -0
  108. contractforge_core/planner/semantic_checks.py +56 -0
  109. contractforge_core/planner/write_checks.py +156 -0
  110. contractforge_core/portability.py +53 -0
  111. contractforge_core/preparation/__init__.py +27 -0
  112. contractforge_core/preparation/staging.py +129 -0
  113. contractforge_core/project.py +101 -0
  114. contractforge_core/quality/__init__.py +24 -0
  115. contractforge_core/quality/results.py +73 -0
  116. contractforge_core/quality/rules.py +12 -0
  117. contractforge_core/reporting/__init__.py +5 -0
  118. contractforge_core/reporting/models.py +13 -0
  119. contractforge_core/results.py +27 -0
  120. contractforge_core/runtime/__init__.py +5 -0
  121. contractforge_core/runtime/models.py +38 -0
  122. contractforge_core/schema/__init__.py +13 -0
  123. contractforge_core/schema/diff.py +122 -0
  124. contractforge_core/schema/policy.py +25 -0
  125. contractforge_core/security/__init__.py +7 -0
  126. contractforge_core/security/redaction.py +73 -0
  127. contractforge_core/semantic/__init__.py +29 -0
  128. contractforge_core/semantic/models.py +108 -0
  129. contractforge_core/watermark.py +65 -0
  130. contractforge_core-0.1.0.dist-info/METADATA +374 -0
  131. contractforge_core-0.1.0.dist-info/RECORD +134 -0
  132. contractforge_core-0.1.0.dist-info/WHEEL +4 -0
  133. contractforge_core-0.1.0.dist-info/entry_points.txt +2 -0
  134. contractforge_core-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,117 @@
1
+ """Starter contract generation for the core ContractForge CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from contractforge_core.cli_io import yaml_dump
10
+
11
+
12
+ def init_contract(args: Any) -> int:
13
+ output = _base_output(args.output)
14
+ files = {
15
+ output.with_suffix(".ingestion.yaml"): _init_ingestion(args),
16
+ output.with_suffix(".annotations.yaml"): _init_annotations(args),
17
+ output.with_suffix(".operations.yaml"): _init_operations(args),
18
+ output.with_suffix(".access.yaml"): _init_access(args),
19
+ output.with_suffix(".environment.yaml"): _init_environment(args),
20
+ }
21
+ written = []
22
+ for path, payload in files.items():
23
+ _write_mapping(path, payload, force=args.force)
24
+ written.append(str(path))
25
+ return _print({"status": "SUCCESS", "written": written}, args.indent)
26
+
27
+
28
+ def _init_ingestion(args: Any) -> dict[str, Any]:
29
+ merge_keys = _csv(args.merge_keys)
30
+ hash_keys = _csv(args.hash_keys) or merge_keys
31
+ if args.mode in {"scd1_upsert", "scd2_historical", "snapshot_soft_delete"} and not merge_keys:
32
+ raise ValueError(f"--merge-keys is required for mode={args.mode}")
33
+ if args.mode == "scd1_hash_diff" and not hash_keys:
34
+ raise ValueError("--hash-keys or --merge-keys is required for mode=scd1_hash_diff")
35
+ contract: dict[str, Any] = {
36
+ "source": {"type": "table", "table": args.source},
37
+ "target": {"catalog": args.catalog, "schema": args.target_schema or args.layer, "table": args.target_table},
38
+ "layer": args.layer,
39
+ "mode": args.mode,
40
+ "schema_policy": args.schema_policy,
41
+ }
42
+ if merge_keys:
43
+ contract["merge_keys"] = merge_keys
44
+ contract["quality_rules"] = {"not_null": merge_keys}
45
+ if args.mode == "scd1_hash_diff" and hash_keys:
46
+ contract["hash_keys"] = hash_keys
47
+ watermarks = _csv(args.watermark_columns)
48
+ if watermarks:
49
+ contract["watermark_columns"] = watermarks
50
+ return contract
51
+
52
+
53
+ def _init_annotations(args: Any) -> dict[str, Any]:
54
+ return {
55
+ "target": _target(args),
56
+ "table": {"description": args.description or f"TODO: describe {args.target_table}", "tags": {"domain": args.domain or "TODO"}},
57
+ "columns": {},
58
+ }
59
+
60
+
61
+ def _init_operations(args: Any) -> dict[str, Any]:
62
+ return {
63
+ "target": _target(args),
64
+ "ownership": {
65
+ "business_owner": args.owner or "TODO",
66
+ "technical_owner": args.technical_owner or "data-platform",
67
+ "support_group": args.support_group or "data-platform",
68
+ },
69
+ "operations": {
70
+ "criticality": args.criticality,
71
+ "expected_frequency": args.expected_frequency,
72
+ "freshness_sla_minutes": args.freshness_sla_minutes,
73
+ "alert_on_failure": True,
74
+ "alert_on_quality_fail": True,
75
+ "runbook_url": args.runbook_url or "TODO",
76
+ },
77
+ }
78
+
79
+
80
+ def _init_access(args: Any) -> dict[str, Any]:
81
+ return {
82
+ "target": _target(args),
83
+ "access_policy": {"mode": "validate_only", "on_drift": "warn", "revoke_unmanaged": False},
84
+ "grants": [{"principal": args.access_principal or "data-engineers", "privileges": ["SELECT"]}],
85
+ }
86
+
87
+
88
+ def _init_environment(args: Any) -> dict[str, Any]:
89
+ return {"name": "dev", "adapter": args.adapter, "evidence": {"catalog": args.catalog, "schema": "ops"}}
90
+
91
+
92
+ def _target(args: Any) -> dict[str, str]:
93
+ return {"catalog": args.catalog, "schema": args.target_schema or args.layer, "table": args.target_table}
94
+
95
+
96
+ def _write_mapping(path: Path, payload: dict[str, Any], *, force: bool) -> None:
97
+ if path.exists() and not force:
98
+ raise FileExistsError(f"{path} already exists; use --force to overwrite it")
99
+ path.parent.mkdir(parents=True, exist_ok=True)
100
+ path.write_text(yaml_dump(payload), encoding="utf-8")
101
+
102
+
103
+ def _print(payload: object, indent: int) -> int:
104
+ print(json.dumps(payload, indent=indent, sort_keys=True, default=str))
105
+ return 0
106
+
107
+
108
+ def _csv(value: str | None) -> list[str]:
109
+ return [item.strip() for item in str(value or "").split(",") if item.strip()]
110
+
111
+
112
+ def _base_output(path: Path) -> Path:
113
+ name = path.name
114
+ for suffix in (".ingestion.yaml", ".ingestion.yml", ".ingestion.json"):
115
+ if name.endswith(suffix):
116
+ return path.with_name(name[: -len(suffix)])
117
+ return path
@@ -0,0 +1,22 @@
1
+ """Small JSON/YAML helpers for core CLI modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+
9
+ def yaml_load(text: str) -> Any:
10
+ try:
11
+ import yaml # type: ignore
12
+ except Exception as exc: # pragma: no cover
13
+ raise RuntimeError("YAML support requires PyYAML; use JSON files or install PyYAML") from exc
14
+ return yaml.safe_load(text)
15
+
16
+
17
+ def yaml_dump(payload: dict[str, Any]) -> str:
18
+ try:
19
+ import yaml # type: ignore
20
+ except Exception: # pragma: no cover
21
+ return json.dumps(payload, indent=2, sort_keys=False) + "\n"
22
+ return yaml.safe_dump(payload, allow_unicode=True, sort_keys=False)
@@ -0,0 +1,164 @@
1
+ """Platform-neutral constants and shared type aliases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Literal, Union
6
+
7
+ FRAMEWORK_VERSION = "1.0.0"
8
+ CTRL_SCHEMA_VERSION = 1
9
+
10
+ Layer = str
11
+
12
+ WriteMode = Literal[
13
+ "scd0_append",
14
+ "scd0_overwrite",
15
+ "scd1_upsert",
16
+ "scd1_hash_diff",
17
+ "scd2_historical",
18
+ "snapshot_soft_delete",
19
+ ]
20
+ WriteEngine = Literal[
21
+ "auto",
22
+ "core_managed",
23
+ "native_merge",
24
+ "native_cdc",
25
+ ]
26
+ WriteEngineFallbackPolicy = Literal["fail", "fallback_to_core", "preview_only"]
27
+ SchemaPolicy = Literal["permissive", "additive_only", "strict"]
28
+ QualityFailAction = Literal["fail", "warn", "quarantine"]
29
+ QualityRuleSeverity = Literal["warn", "quarantine", "abort"]
30
+ SCD2LateArrivingPolicy = Literal["apply", "ignore", "reject"]
31
+ GovernanceFailurePolicy = Literal["fail", "warn", "ignore"]
32
+ AccessMode = Literal["apply", "validate_only", "ignore"]
33
+ AccessDriftPolicy = Literal["fail", "warn", "reconcile"]
34
+ IdempotencyPolicy = Literal["always_run", "skip_if_success", "fail_if_success", "rerun_if_failed"]
35
+ Source = Union[str, dict[str, Any]]
36
+
37
+ CUSTOM_WRITE_MODE_PREFIX = "custom:"
38
+
39
+ VALID_WRITE_MODES = {
40
+ "scd0_append",
41
+ "scd0_overwrite",
42
+ "scd1_upsert",
43
+ "scd1_hash_diff",
44
+ "scd2_historical",
45
+ "snapshot_soft_delete",
46
+ }
47
+ VALID_WRITE_ENGINES = {"auto", "core_managed", "native_merge", "native_cdc"}
48
+ VALID_WRITE_ENGINE_FALLBACK_POLICIES = {"fail", "fallback_to_core", "preview_only"}
49
+ VALID_SCHEMA_POLICIES = {"permissive", "additive_only", "strict"}
50
+ VALID_QUALITY_FAIL_ACTIONS = {"fail", "warn", "quarantine"}
51
+ VALID_QUALITY_RULE_SEVERITIES = {"warn", "quarantine", "abort"}
52
+ VALID_SCD2_LATE_ARRIVING_POLICIES = {"apply", "ignore", "reject"}
53
+ VALID_GOVERNANCE_FAILURE_POLICIES = {"fail", "warn", "ignore"}
54
+ VALID_ACCESS_MODES = {"apply", "validate_only", "ignore"}
55
+ VALID_ACCESS_DRIFT_POLICIES = {"fail", "warn", "reconcile"}
56
+ VALID_CRITICALITY_LEVELS = {"low", "medium", "high", "critical"}
57
+ VALID_EXPECTED_FREQUENCIES = {"hourly", "daily", "weekly", "monthly", "ad_hoc"}
58
+ VALID_SENSITIVITY_LEVELS = {"public", "internal", "restricted", "confidential"}
59
+ VALID_PII_TYPES = {
60
+ "address",
61
+ "bank_account",
62
+ "birth_date",
63
+ "credit_card",
64
+ "device_id",
65
+ "document",
66
+ "email",
67
+ "financial",
68
+ "health",
69
+ "ip_address",
70
+ "name",
71
+ "national_id",
72
+ "other",
73
+ "phone",
74
+ "ssn",
75
+ "tax_id",
76
+ "unknown",
77
+ }
78
+ VALID_ACCESS_PRIVILEGES = {
79
+ "ALL PRIVILEGES",
80
+ "APPLY TAG",
81
+ "CREATE",
82
+ "CREATE FUNCTION",
83
+ "CREATE MODEL",
84
+ "CREATE TABLE",
85
+ "CREATE VOLUME",
86
+ "EXECUTE",
87
+ "MANAGE",
88
+ "MODIFY",
89
+ "READ FILES",
90
+ "READ VOLUME",
91
+ "REFRESH",
92
+ "SELECT",
93
+ "USAGE",
94
+ "WRITE FILES",
95
+ "WRITE VOLUME",
96
+ }
97
+ VALID_IDEMPOTENCY_POLICIES = {"always_run", "skip_if_success", "fail_if_success", "rerun_if_failed"}
98
+ VALID_EXPLAIN_FORMATS = {"simple", "extended", "codegen", "cost", "formatted"}
99
+
100
+ VALID_SOURCE_TYPES = {"connector"}
101
+ VALID_SOURCE_CONNECTORS = {
102
+ "adls",
103
+ "avro",
104
+ "azure_blob",
105
+ "bigquery_jdbc",
106
+ "blob",
107
+ "csv",
108
+ "db2",
109
+ "delta",
110
+ "delta_share",
111
+ "delta_table",
112
+ "eventhubs_available_now",
113
+ "eventhubs_bounded",
114
+ "gcs",
115
+ "http_csv",
116
+ "http_file",
117
+ "http_json",
118
+ "http_text",
119
+ "iceberg_table",
120
+ "incremental_files",
121
+ "jdbc",
122
+ "json",
123
+ "kafka_available_now",
124
+ "kafka_bounded",
125
+ "mariadb",
126
+ "mysql",
127
+ "native_passthrough",
128
+ "object_storage",
129
+ "oracle",
130
+ "orc",
131
+ "parquet",
132
+ "postgres",
133
+ "redshift",
134
+ "rest_api",
135
+ "s3",
136
+ "snowflake_jdbc",
137
+ "sql",
138
+ "sqlserver",
139
+ "table",
140
+ "text",
141
+ "view",
142
+ "xml",
143
+ }
144
+ VALID_OBJECT_STORAGE_PROVIDERS = {"adls", "azure_blob", "gcs", "s3"}
145
+ VALID_FILE_CONNECTOR_FORMATS = {"avro", "csv", "delta", "json", "jsonl", "ndjson", "orc", "parquet", "text", "xml"}
146
+ VALID_HTTP_FILE_FORMATS = {"csv", "json", "jsonl", "ndjson", "text"}
147
+ VALID_SOURCE_TRIGGERS = {"available_now"}
148
+ ARRAY_MODES = {"explode", "explode_outer", "first", "keep", "size", "to_json"}
149
+ MAX_INLINE_ACCEPTED_VALUES = 1000
150
+ CONTROL_COLUMNS = {
151
+ "ingestion_ts_utc",
152
+ "__run_id",
153
+ "row_hash",
154
+ "valid_from",
155
+ "valid_to",
156
+ "is_current",
157
+ "is_active",
158
+ "deleted_at",
159
+ "changed_columns",
160
+ }
161
+
162
+
163
+ def is_valid_write_mode(value: str) -> bool:
164
+ return value in VALID_WRITE_MODES or value.startswith(CUSTOM_WRITE_MODE_PREFIX)
@@ -0,0 +1,140 @@
1
+ """Platform-neutral connector helpers."""
2
+
3
+ from contractforge_core.connectors.http_files import (
4
+ HTTP_FILE_TYPES,
5
+ cleanup_http_file_downloads,
6
+ download_http_file,
7
+ http_file_format,
8
+ http_file_headers,
9
+ http_file_params,
10
+ http_file_reader_options,
11
+ http_file_url,
12
+ is_http_file_source,
13
+ read_http_file_payload,
14
+ )
15
+ from contractforge_core.connectors.catalog import (
16
+ CATALOG_SOURCE_TYPES,
17
+ LogicalTableReference,
18
+ TableRefResolver,
19
+ catalog_source_query,
20
+ catalog_source_table_or_path,
21
+ has_table_reference_placeholders,
22
+ is_catalog_source,
23
+ parse_logical_table_reference,
24
+ render_table_reference_placeholders,
25
+ source_logical_table_reference,
26
+ )
27
+ from contractforge_core.connectors.files import (
28
+ FILE_SOURCE_TYPES,
29
+ OBJECT_STORAGE_TYPES,
30
+ file_reader_options,
31
+ file_source_format,
32
+ is_file_source,
33
+ normalize_file_format,
34
+ object_storage_provider,
35
+ )
36
+ from contractforge_core.connectors.streams import (
37
+ AVAILABLE_NOW_STREAM_TYPES,
38
+ BOUNDED_STREAM_TYPES,
39
+ STREAM_SOURCE_TYPES,
40
+ eventhubs_bounded_options,
41
+ is_available_now_stream_source,
42
+ is_bounded_stream_source,
43
+ is_eventhubs_stream_source,
44
+ is_kafka_stream_source,
45
+ kafka_bounded_options,
46
+ stream_source_format,
47
+ )
48
+ from contractforge_core.connectors.sharing import delta_share_options, is_delta_share_source
49
+ from contractforge_core.connectors.databases import JDBC_CONNECTORS, jdbc_common_options, validate_jdbc_source
50
+ from contractforge_core.connectors.native_passthrough import (
51
+ is_native_passthrough_source,
52
+ native_passthrough_descriptor,
53
+ redact_secret_fields,
54
+ )
55
+ from contractforge_core.connectors.metadata import (
56
+ diagnose_source_connectors,
57
+ list_source_connector_details,
58
+ source_capabilities,
59
+ source_connector_details,
60
+ source_metadata_from_contract,
61
+ source_metadata_from_mapping,
62
+ source_provider,
63
+ )
64
+ from contractforge_core.connectors.databases import (
65
+ generate_rds_iam_auth_token,
66
+ infer_aws_region_from_rds_host,
67
+ parse_jdbc_host_port,
68
+ rds_iam_review_options,
69
+ )
70
+ from contractforge_core.connectors.api.rest import (
71
+ REST_API_CONNECTORS,
72
+ is_rest_api_connector,
73
+ read_rest_api_records,
74
+ rest_api_descriptor,
75
+ rest_request_headers,
76
+ )
77
+
78
+ __all__ = [
79
+ "HTTP_FILE_TYPES",
80
+ "cleanup_http_file_downloads",
81
+ "download_http_file",
82
+ "read_rest_api_records",
83
+ "rest_request_headers",
84
+ "JDBC_CONNECTORS",
85
+ "CATALOG_SOURCE_TYPES",
86
+ "LogicalTableReference",
87
+ "TableRefResolver",
88
+ "AVAILABLE_NOW_STREAM_TYPES",
89
+ "BOUNDED_STREAM_TYPES",
90
+ "STREAM_SOURCE_TYPES",
91
+ "FILE_SOURCE_TYPES",
92
+ "OBJECT_STORAGE_TYPES",
93
+ "REST_API_CONNECTORS",
94
+ "catalog_source_query",
95
+ "catalog_source_table_or_path",
96
+ "delta_share_options",
97
+ "diagnose_source_connectors",
98
+ "eventhubs_bounded_options",
99
+ "file_reader_options",
100
+ "file_source_format",
101
+ "generate_rds_iam_auth_token",
102
+ "has_table_reference_placeholders",
103
+ "http_file_format",
104
+ "http_file_headers",
105
+ "http_file_params",
106
+ "http_file_reader_options",
107
+ "http_file_url",
108
+ "read_http_file_payload",
109
+ "infer_aws_region_from_rds_host",
110
+ "is_http_file_source",
111
+ "is_available_now_stream_source",
112
+ "is_bounded_stream_source",
113
+ "is_catalog_source",
114
+ "is_delta_share_source",
115
+ "is_eventhubs_stream_source",
116
+ "is_file_source",
117
+ "is_kafka_stream_source",
118
+ "stream_source_format",
119
+ "jdbc_common_options",
120
+ "list_source_connector_details",
121
+ "is_native_passthrough_source",
122
+ "is_rest_api_connector",
123
+ "kafka_bounded_options",
124
+ "native_passthrough_descriptor",
125
+ "parse_jdbc_host_port",
126
+ "parse_logical_table_reference",
127
+ "redact_secret_fields",
128
+ "render_table_reference_placeholders",
129
+ "rest_api_descriptor",
130
+ "normalize_file_format",
131
+ "object_storage_provider",
132
+ "rds_iam_review_options",
133
+ "source_capabilities",
134
+ "source_connector_details",
135
+ "source_metadata_from_contract",
136
+ "source_metadata_from_mapping",
137
+ "source_provider",
138
+ "source_logical_table_reference",
139
+ "validate_jdbc_source",
140
+ ]
@@ -0,0 +1,17 @@
1
+ """Facade for the API connector family."""
2
+
3
+ from contractforge_core.connectors.api.rest import (
4
+ REST_API_CONNECTORS,
5
+ is_rest_api_connector,
6
+ read_rest_api_records,
7
+ rest_api_descriptor,
8
+ rest_request_headers,
9
+ )
10
+
11
+ __all__ = [
12
+ "REST_API_CONNECTORS",
13
+ "is_rest_api_connector",
14
+ "read_rest_api_records",
15
+ "rest_api_descriptor",
16
+ "rest_request_headers",
17
+ ]
@@ -0,0 +1,51 @@
1
+ """Facade for the bounded REST API connector."""
2
+
3
+ from contractforge_core.connectors.api.rest.auth import rest_request_headers
4
+ from contractforge_core.connectors.api.rest.pagination import (
5
+ json_path,
6
+ link_header_next,
7
+ max_pages_for_source,
8
+ next_url,
9
+ page_urls,
10
+ pagination_type,
11
+ url_with_params,
12
+ )
13
+ from contractforge_core.connectors.api.rest.reader import read_rest_api_records
14
+ from contractforge_core.connectors.api.rest.retry import (
15
+ RETRYABLE_HTTP_STATUS,
16
+ is_retryable_http_error,
17
+ is_retryable_network_error,
18
+ sleep_retry_backoff,
19
+ )
20
+ from contractforge_core.connectors.api.rest.safety import (
21
+ ALLOW_PRIVATE_FLAG,
22
+ ALLOWED_SCHEMES,
23
+ validate_http_target,
24
+ )
25
+ from contractforge_core.connectors.api.rest.source import (
26
+ REST_API_CONNECTORS,
27
+ is_rest_api_connector,
28
+ rest_api_descriptor,
29
+ )
30
+
31
+ __all__ = [
32
+ "ALLOWED_SCHEMES",
33
+ "ALLOW_PRIVATE_FLAG",
34
+ "REST_API_CONNECTORS",
35
+ "RETRYABLE_HTTP_STATUS",
36
+ "is_retryable_http_error",
37
+ "is_retryable_network_error",
38
+ "is_rest_api_connector",
39
+ "json_path",
40
+ "link_header_next",
41
+ "max_pages_for_source",
42
+ "next_url",
43
+ "page_urls",
44
+ "pagination_type",
45
+ "read_rest_api_records",
46
+ "rest_api_descriptor",
47
+ "rest_request_headers",
48
+ "sleep_retry_backoff",
49
+ "url_with_params",
50
+ "validate_http_target",
51
+ ]
@@ -0,0 +1,104 @@
1
+ """Auth header construction for the platform-neutral bounded REST client.
2
+
3
+ This module operates on already-resolved values. Adapters are responsible for
4
+ resolving any secret placeholders (e.g. via their platform secret store) before
5
+ calling these helpers, so the core never depends on a platform secret backend.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import base64
11
+ import json
12
+ import urllib.parse
13
+ import urllib.request
14
+ from typing import Any
15
+
16
+ from contractforge_core.connectors.api.rest.safety import validate_http_target
17
+ from contractforge_core.connectors.api.rest.transport import open_request as _open_request
18
+
19
+
20
+ def rest_request_headers(
21
+ source: dict[str, Any],
22
+ incremental: dict[str, Any] | None = None,
23
+ watermark: str | None = None,
24
+ ) -> dict[str, str]:
25
+ request = _dict(source.get("request"))
26
+ headers = _string_dict(request.get("headers"))
27
+ auth = _dict(source.get("auth"))
28
+ auth_type = str(auth.get("type") or "").strip().lower()
29
+ if auth_type == "bearer_token":
30
+ token = auth.get("token")
31
+ if not token:
32
+ raise ValueError("REST API bearer_token auth requires auth.token")
33
+ headers["Authorization"] = f"Bearer {token}"
34
+ if auth_type == "api_key":
35
+ api_key = auth.get("value")
36
+ if not api_key:
37
+ raise ValueError("REST API api_key auth requires auth.value")
38
+ headers[str(auth.get("header") or "X-Api-Key")] = str(api_key)
39
+ if auth_type == "basic":
40
+ username = auth.get("username")
41
+ password = auth.get("password")
42
+ if not username or not password:
43
+ raise ValueError("REST API basic auth requires auth.username and auth.password")
44
+ raw = f"{username}:{password}".encode("utf-8")
45
+ headers["Authorization"] = "Basic " + base64.b64encode(raw).decode("ascii")
46
+ if auth_type == "oauth_client_credentials":
47
+ headers["Authorization"] = f"Bearer {_oauth_client_credentials_token(auth, source)}"
48
+ if auth and auth_type not in {"bearer_token", "api_key", "basic", "oauth_client_credentials"}:
49
+ raise ValueError(f"REST API auth.type={auth_type!r} is not supported")
50
+ incremental = incremental or {}
51
+ if watermark and incremental.get("watermark_header"):
52
+ headers[str(incremental["watermark_header"])] = watermark
53
+ return headers
54
+
55
+
56
+ def _oauth_client_credentials_token(auth: dict[str, Any], source: dict[str, Any]) -> str:
57
+ token_url = str(auth.get("token_url") or "").strip()
58
+ tenant_id = str(auth.get("tenant_id") or "").strip()
59
+ if not token_url and tenant_id:
60
+ token_url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
61
+ client_id = auth.get("client_id")
62
+ client_secret = auth.get("client_secret")
63
+ if not token_url or not client_id or not client_secret:
64
+ raise ValueError(
65
+ "REST API OAuth client credentials auth requires auth.token_url, auth.client_id and auth.client_secret; "
66
+ "auth.tenant_id can replace auth.token_url for Microsoft Entra ID."
67
+ )
68
+ fields = {
69
+ "grant_type": "client_credentials",
70
+ "client_id": str(client_id),
71
+ "client_secret": str(client_secret),
72
+ }
73
+ scope = auth.get("scope")
74
+ scopes = auth.get("scopes")
75
+ if scope:
76
+ fields["scope"] = str(scope)
77
+ elif isinstance(scopes, (list, tuple, set)):
78
+ fields["scope"] = " ".join(str(item) for item in scopes if str(item).strip())
79
+ elif scopes:
80
+ fields["scope"] = str(scopes)
81
+ timeout = int(_dict(source.get("limits")).get("timeout_seconds", 60))
82
+ validate_http_target(token_url, context="REST API OAuth token URL")
83
+ request = urllib.request.Request(
84
+ token_url,
85
+ method="POST",
86
+ data=urllib.parse.urlencode(fields).encode("utf-8"),
87
+ headers={"Content-Type": "application/x-www-form-urlencoded"},
88
+ )
89
+ with _open_request(request, timeout=timeout) as response:
90
+ raw = response.read()
91
+ encoding = response.headers.get_content_charset() if hasattr(response.headers, "get_content_charset") else None
92
+ payload = json.loads(raw.decode(encoding or "utf-8")) if raw else {}
93
+ token = payload.get("access_token")
94
+ if not token:
95
+ raise ValueError("OAuth response did not return access_token")
96
+ return str(token)
97
+
98
+
99
+ def _dict(value: object) -> dict[str, Any]:
100
+ return dict(value) if isinstance(value, dict) else {}
101
+
102
+
103
+ def _string_dict(value: object) -> dict[str, str]:
104
+ return {str(key): str(item) for key, item in _dict(value).items()}