dmesh-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dmesh/sdk/__init__.py +39 -0
- dmesh/sdk/adapters/psycopg.py +3 -0
- dmesh/sdk/config.py +152 -0
- dmesh/sdk/core/enricher.py +58 -0
- dmesh/sdk/core/id_generator.py +44 -0
- dmesh/sdk/core/validator.py +102 -0
- dmesh/sdk/models/__init__.py +14 -0
- dmesh/sdk/models/core.py +32 -0
- dmesh/sdk/models/exceptions.py +13 -0
- dmesh/sdk/operations/data_contract.py +63 -0
- dmesh/sdk/operations/data_product.py +53 -0
- dmesh/sdk/operations/discover.py +20 -0
- dmesh/sdk/operations/utils.py +6 -0
- dmesh/sdk/persistency/factory.py +78 -0
- dmesh/sdk/persistency/in_memory.py +62 -0
- dmesh/sdk/persistency/postgres.py +153 -0
- dmesh/sdk/ports/repository.py +15 -0
- dmesh/sdk/schemas/__init__.py +0 -0
- dmesh/sdk/schemas/odcs-3.1.0.json +3265 -0
- dmesh/sdk/schemas/odps-1.0.0.json +588 -0
- dmesh/sdk/sdk.py +233 -0
- dmesh_sdk-0.1.0.dist-info/METADATA +153 -0
- dmesh_sdk-0.1.0.dist-info/RECORD +24 -0
- dmesh_sdk-0.1.0.dist-info/WHEEL +4 -0
dmesh/sdk/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from .sdk import AsyncSDK
|
|
2
|
+
from .models import (
|
|
3
|
+
DataProduct,
|
|
4
|
+
DataContract,
|
|
5
|
+
OpenDataMeshError,
|
|
6
|
+
DataProductValidationError,
|
|
7
|
+
DataContractValidationError,
|
|
8
|
+
)
|
|
9
|
+
from .operations.data_product import create_dp, update_dp, get_dp, list_dps, delete_dp
|
|
10
|
+
from .operations.data_contract import create_dc, update_dc, patch_dc, get_dc, list_dcs, delete_dc
|
|
11
|
+
from .operations.discover import discover
|
|
12
|
+
from .operations.utils import flush
|
|
13
|
+
from .persistency.factory import RepositoryFactory
|
|
14
|
+
from .config import get_settings
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"AsyncSDK",
|
|
19
|
+
"DataProduct",
|
|
20
|
+
"DataContract",
|
|
21
|
+
"OpenDataMeshError",
|
|
22
|
+
"DataProductValidationError",
|
|
23
|
+
"DataContractValidationError",
|
|
24
|
+
"create_dp",
|
|
25
|
+
"update_dp",
|
|
26
|
+
"get_dp",
|
|
27
|
+
"list_dps",
|
|
28
|
+
"delete_dp",
|
|
29
|
+
"create_dc",
|
|
30
|
+
"update_dc",
|
|
31
|
+
"patch_dc",
|
|
32
|
+
"get_dc",
|
|
33
|
+
"list_dcs",
|
|
34
|
+
"delete_dc",
|
|
35
|
+
"discover",
|
|
36
|
+
"flush",
|
|
37
|
+
"RepositoryFactory",
|
|
38
|
+
"get_settings",
|
|
39
|
+
]
|
dmesh/sdk/config.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import tomllib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Optional, Tuple, Type
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
8
|
+
from pydantic_settings import (
|
|
9
|
+
BaseSettings,
|
|
10
|
+
PydanticBaseSettingsSource,
|
|
11
|
+
SettingsConfigDict,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# Deep merge for TOML config
|
|
15
|
+
def _deep_update(base_dict: Dict[str, Any], update_with: Dict[str, Any]):
|
|
16
|
+
for key, value in update_with.items():
|
|
17
|
+
if isinstance(value, dict) and key in base_dict and isinstance(base_dict[key], dict):
|
|
18
|
+
_deep_update(base_dict[key], value)
|
|
19
|
+
else:
|
|
20
|
+
base_dict[key] = value
|
|
21
|
+
|
|
22
|
+
class TomlConfigSettingsSource(PydanticBaseSettingsSource):
|
|
23
|
+
"""
|
|
24
|
+
A custom settings source that loads configuration from one or more TOML files.
|
|
25
|
+
Loads 'config/base.toml' first, then 'config/{profile}.toml' as an override.
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, settings_cls: Type[BaseSettings], profile: str = "development"):
|
|
28
|
+
super().__init__(settings_cls)
|
|
29
|
+
self.profile = profile
|
|
30
|
+
|
|
31
|
+
def get_field_value(self, field: Any, field_name: str) -> Tuple[Any, str, bool]:
|
|
32
|
+
# This is for granular mapping, but we use __call__ to return a dict
|
|
33
|
+
return None, field_name, False
|
|
34
|
+
|
|
35
|
+
def __call__(self) -> Dict[str, Any]:
|
|
36
|
+
config = {}
|
|
37
|
+
config_dir = Path("config")
|
|
38
|
+
|
|
39
|
+
# 1. Load base.toml (lowest priority in this source)
|
|
40
|
+
base_path = config_dir / "base.toml"
|
|
41
|
+
if base_path.exists():
|
|
42
|
+
try:
|
|
43
|
+
with base_path.open("rb") as f:
|
|
44
|
+
_deep_update(config, tomllib.load(f))
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"Error loading base config: {e}")
|
|
47
|
+
|
|
48
|
+
# 2. Load profile-specific toml (e.g. development.toml, lakebase.toml)
|
|
49
|
+
profile_path = config_dir / f"{self.profile}.toml"
|
|
50
|
+
if profile_path.exists():
|
|
51
|
+
try:
|
|
52
|
+
with profile_path.open("rb") as f:
|
|
53
|
+
_deep_update(config, tomllib.load(f))
|
|
54
|
+
except Exception as e:
|
|
55
|
+
print(f"Error loading profile config '{self.profile}': {e}")
|
|
56
|
+
|
|
57
|
+
return config
|
|
58
|
+
|
|
59
|
+
class DatabaseSettings(BaseModel):
|
|
60
|
+
host: str = "localhost"
|
|
61
|
+
port: int = 5432
|
|
62
|
+
user: str = "postgres"
|
|
63
|
+
password: str = Field(..., min_length=1) # Required secret
|
|
64
|
+
name: str = "postgres"
|
|
65
|
+
|
|
66
|
+
class ApiSettings(BaseModel):
|
|
67
|
+
host: str = "0.0.0.0"
|
|
68
|
+
port: int = 8000
|
|
69
|
+
debug: bool = False
|
|
70
|
+
|
|
71
|
+
class Settings(BaseSettings):
|
|
72
|
+
# Field names match the nesting in TOML and env vars
|
|
73
|
+
db: DatabaseSettings = Field(default_factory=DatabaseSettings)
|
|
74
|
+
api: ApiSettings = Field(default_factory=ApiSettings)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def profile(self) -> str:
|
|
78
|
+
return os.getenv("APP_ENV", "development")
|
|
79
|
+
|
|
80
|
+
model_config = SettingsConfigDict(
|
|
81
|
+
env_prefix="DMESH_",
|
|
82
|
+
env_nested_delimiter="__",
|
|
83
|
+
# .env files order defined in get_settings to handle dynamic APP_ENV
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def settings_customise_sources(
|
|
88
|
+
cls,
|
|
89
|
+
settings_cls: Type[BaseSettings],
|
|
90
|
+
init_settings: PydanticBaseSettingsSource,
|
|
91
|
+
env_settings: PydanticBaseSettingsSource,
|
|
92
|
+
dotenv_settings: PydanticBaseSettingsSource,
|
|
93
|
+
file_secret_settings: PydanticBaseSettingsSource,
|
|
94
|
+
) -> Tuple[PydanticBaseSettingsSource, ...]:
|
|
95
|
+
profile = os.getenv("APP_ENV", "development")
|
|
96
|
+
return (
|
|
97
|
+
init_settings, # 1. CLI flags / runtime args (highest priority)
|
|
98
|
+
env_settings, # 2. Environment variables
|
|
99
|
+
dotenv_settings, # 3 & 4. .env.{profile} and .env files
|
|
100
|
+
TomlConfigSettingsSource(settings_cls, profile=profile), # 5 & 6. config/{profile}.toml and config/base.toml
|
|
101
|
+
# Code defaults (last / lowest priority, implicitly handled by Pydantic)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
_settings: Optional[Settings] = None
|
|
105
|
+
|
|
106
|
+
def get_settings(**kwargs) -> Settings:
|
|
107
|
+
"""
|
|
108
|
+
Initializes and returns the global settings instance.
|
|
109
|
+
The profile is chosen via APP_ENV environment variable (default: development).
|
|
110
|
+
Uses the priority stack:
|
|
111
|
+
1. CLI flags / runtime args (passed to get_settings)
|
|
112
|
+
2. Environment variables (DMESH_ pref)
|
|
113
|
+
3. .env.{APP_ENV}
|
|
114
|
+
4. .env
|
|
115
|
+
5. config/{APP_ENV}.toml
|
|
116
|
+
6. config/base.toml
|
|
117
|
+
7. Code defaults
|
|
118
|
+
"""
|
|
119
|
+
global _settings
|
|
120
|
+
if _settings is None or kwargs:
|
|
121
|
+
profile = os.getenv("APP_ENV", "development")
|
|
122
|
+
|
|
123
|
+
# We dynamicallly set env_file list to respect profile priority
|
|
124
|
+
# Pydantic-settings: later files take precedence
|
|
125
|
+
env_files = [".env", f".env.{profile}"]
|
|
126
|
+
|
|
127
|
+
class RuntimeSettings(Settings):
|
|
128
|
+
model_config = SettingsConfigDict(
|
|
129
|
+
env_prefix="DMESH_",
|
|
130
|
+
env_nested_delimiter="__",
|
|
131
|
+
env_file=env_files,
|
|
132
|
+
env_file_encoding="utf-8",
|
|
133
|
+
extra="ignore",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
settings_obj = RuntimeSettings(**kwargs)
|
|
138
|
+
if not kwargs: # Only cache if it's the default global settings
|
|
139
|
+
_settings = settings_obj
|
|
140
|
+
return settings_obj
|
|
141
|
+
except ValidationError as e:
|
|
142
|
+
print(f"Invalid configuration:\n{e}")
|
|
143
|
+
sys.exit(1)
|
|
144
|
+
|
|
145
|
+
return _settings
|
|
146
|
+
|
|
147
|
+
if __name__ == "__main__":
|
|
148
|
+
# Example usage / validation test
|
|
149
|
+
print("Testing configuration loading...")
|
|
150
|
+
settings = get_settings()
|
|
151
|
+
print(f"Profile: {settings.profile}")
|
|
152
|
+
print(f"DB Host: {settings.db.host}")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Enriches an ODPS spec dict before persistence."""
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from dmesh.sdk.core.id_generator import make_dp_id
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def enrich_dp_spec(spec: dict[str, Any]) -> dict[str, Any]:
|
|
8
|
+
"""Return a new dict with deterministic id injected and defaults applied.
|
|
9
|
+
|
|
10
|
+
The id is derived from domain + name + version (deterministic).
|
|
11
|
+
Does not mutate the input dict.
|
|
12
|
+
"""
|
|
13
|
+
enriched = dict(spec)
|
|
14
|
+
|
|
15
|
+
# Apply defaults if missing
|
|
16
|
+
enriched.setdefault("apiVersion", "v1.0.0")
|
|
17
|
+
enriched.setdefault("kind", "DataProduct")
|
|
18
|
+
enriched.setdefault("version", "v1.0.0")
|
|
19
|
+
enriched.setdefault("status", "draft")
|
|
20
|
+
# Default outputPorts version to "v1" if missing
|
|
21
|
+
if "outputPorts" in enriched and isinstance(enriched["outputPorts"], list):
|
|
22
|
+
new_ports = []
|
|
23
|
+
for port in enriched["outputPorts"]:
|
|
24
|
+
if isinstance(port, dict):
|
|
25
|
+
p = dict(port)
|
|
26
|
+
p.setdefault("version", "v1")
|
|
27
|
+
new_ports.append(p)
|
|
28
|
+
else:
|
|
29
|
+
new_ports.append(port)
|
|
30
|
+
enriched["outputPorts"] = new_ports
|
|
31
|
+
|
|
32
|
+
enriched["id"] = make_dp_id(
|
|
33
|
+
enriched.get("domain", ""),
|
|
34
|
+
enriched.get("name", ""),
|
|
35
|
+
enriched.get("version"),
|
|
36
|
+
)
|
|
37
|
+
return enriched
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def enrich_dc_spec(spec: dict[str, Any], dp_spec: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
41
|
+
"""Return a new DataContract spec with defaults applied.
|
|
42
|
+
|
|
43
|
+
If a parent Data Product spec is provided, inherit common contextual values.
|
|
44
|
+
Does not mutate the input dict.
|
|
45
|
+
"""
|
|
46
|
+
enriched = dict(spec)
|
|
47
|
+
enriched.setdefault("apiVersion", "v3.1.0")
|
|
48
|
+
enriched.setdefault("kind", "DataContract")
|
|
49
|
+
enriched.setdefault("version", "v1.0.0")
|
|
50
|
+
enriched.setdefault("status", "draft")
|
|
51
|
+
|
|
52
|
+
if dp_spec is not None:
|
|
53
|
+
if "dataProduct" not in enriched and dp_spec.get("name"):
|
|
54
|
+
enriched["dataProduct"] = dp_spec["name"]
|
|
55
|
+
if "domain" not in enriched and dp_spec.get("domain"):
|
|
56
|
+
enriched["domain"] = dp_spec["domain"]
|
|
57
|
+
|
|
58
|
+
return enriched
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Deterministic UUID5 generation for data products and data contracts."""
|
|
2
|
+
import os
|
|
3
|
+
import uuid
|
|
4
|
+
|
|
5
|
+
# Fixed namespace for all dmesh IDs
|
|
6
|
+
_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # NAMESPACE_DNS
|
|
7
|
+
|
|
8
|
+
DEFAULT_DP_SCHEME = "DataProduct/{domain}/{name}/{version}"
|
|
9
|
+
DEFAULT_DC_SCHEME = "DataContract/{domain}/{name}/{version}/{dc_index}"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _scheme(env_var: str, default: str) -> str:
|
|
13
|
+
return os.environ.get(env_var, default)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def make_dp_id(domain: str, name: str, version: str) -> str:
|
|
17
|
+
"""Generate a deterministic UUID5 for a data product."""
|
|
18
|
+
scheme = _scheme("DP_ID_SCHEME", DEFAULT_DP_SCHEME)
|
|
19
|
+
try:
|
|
20
|
+
key = scheme.format(domain=domain, name=name, version=version)
|
|
21
|
+
except KeyError as e:
|
|
22
|
+
# Fallback if scheme is malformed
|
|
23
|
+
key = f"DataProduct/{domain}/{name}/{version}"
|
|
24
|
+
return str(uuid.uuid5(_NAMESPACE, key))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def make_dc_id(dp_domain: str, dp_name: str, dp_version: str, dc_index: int) -> str:
|
|
28
|
+
"""Generate a deterministic UUID5 for a data contract.
|
|
29
|
+
|
|
30
|
+
dc_index is the count of existing data contracts for the parent DP
|
|
31
|
+
at creation time (0-based: first DC gets index 0).
|
|
32
|
+
"""
|
|
33
|
+
scheme = _scheme("DC_ID_SCHEME", DEFAULT_DC_SCHEME)
|
|
34
|
+
try:
|
|
35
|
+
key = scheme.format(
|
|
36
|
+
domain=dp_domain,
|
|
37
|
+
name=dp_name,
|
|
38
|
+
version=dp_version,
|
|
39
|
+
dc_index=dc_index,
|
|
40
|
+
)
|
|
41
|
+
except KeyError as e:
|
|
42
|
+
# Fallback if scheme is malformed
|
|
43
|
+
key = f"DataContract/{dp_domain}/{dp_name}/{dp_version}/{dc_index}"
|
|
44
|
+
return str(uuid.uuid5(_NAMESPACE, key))
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import importlib.resources
|
|
2
|
+
import json
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import jsonschema
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
SCHEMA_URLS = {
|
|
11
|
+
"DataProduct": "https://raw.githubusercontent.com/bitol-io/open-data-product-standard/refs/heads/main/schema/odps-json-schema-{api_version}.json",
|
|
12
|
+
"DataContract": "https://raw.githubusercontent.com/bitol-io/open-data-contract-standard/refs/heads/main/schema/odcs-json-schema-{api_version}.json",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
SCHEMA_MAP = {
|
|
16
|
+
"DataProduct": "odps-{api_version}.json",
|
|
17
|
+
"DataContract": "odcs-{api_version}.json",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SchemaFetchError(Exception):
|
|
22
|
+
"""Raised when the Bitol JSON Schema cannot be fetched."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def validate_spec(spec: dict[str, Any]) -> None:
|
|
26
|
+
"""Validate spec against the versioned Bitol JSON Schema.
|
|
27
|
+
|
|
28
|
+
Prioritizes local schemas in the package over external GitHub URLs.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: if apiVersion or kind is missing from spec.
|
|
32
|
+
SchemaFetchError: if the schema cannot be fetched locally or remotely.
|
|
33
|
+
jsonschema.ValidationError: if the spec is invalid.
|
|
34
|
+
"""
|
|
35
|
+
api_version = spec.get("apiVersion")
|
|
36
|
+
kind = spec.get("kind")
|
|
37
|
+
if not api_version:
|
|
38
|
+
raise ValueError("apiVersion is required for schema validation")
|
|
39
|
+
if not re.match(r"^v\d+\.\d+\.\d+$", api_version):
|
|
40
|
+
raise ValueError(f"invalid apiVersion input \"{api_version}\" expected format: vX.Y.Z")
|
|
41
|
+
|
|
42
|
+
if not kind:
|
|
43
|
+
# Heuristic to detect kind if not specified
|
|
44
|
+
if "domain" in spec or "payload" in spec:
|
|
45
|
+
kind = "DataProduct"
|
|
46
|
+
elif "specification" in spec or "info" in spec:
|
|
47
|
+
kind = "DataContract"
|
|
48
|
+
else:
|
|
49
|
+
# Default to DataProduct if ambiguous
|
|
50
|
+
kind = "DataProduct"
|
|
51
|
+
|
|
52
|
+
# Normalize version: strip 'v' prefix for local file lookup
|
|
53
|
+
clean_version = api_version[1:] if api_version.startswith("v") else api_version
|
|
54
|
+
|
|
55
|
+
# 1. Try local lookup
|
|
56
|
+
local_name_template = SCHEMA_MAP.get(kind, "odps-{api_version}.json")
|
|
57
|
+
local_filename = local_name_template.format(api_version=clean_version)
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
# Modern importlib.resources API (files() instead of is_resource/open_text)
|
|
61
|
+
pkg_path = importlib.resources.files("dmesh.sdk.schemas")
|
|
62
|
+
schema_file = pkg_path / local_filename
|
|
63
|
+
|
|
64
|
+
if schema_file.is_file():
|
|
65
|
+
with schema_file.open("r", encoding="utf-8") as f:
|
|
66
|
+
schema = json.load(f)
|
|
67
|
+
# Successful local schema load
|
|
68
|
+
try:
|
|
69
|
+
jsonschema.validate(spec, schema)
|
|
70
|
+
return
|
|
71
|
+
except jsonschema.ValidationError:
|
|
72
|
+
# If it's a validation error against a local schema, we STOP here.
|
|
73
|
+
# This is the strict Source of Truth.
|
|
74
|
+
raise
|
|
75
|
+
except jsonschema.ValidationError:
|
|
76
|
+
raise
|
|
77
|
+
except Exception:
|
|
78
|
+
# Only fallback if the file itself was missing or malformed
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
# 2. Try remote lookup (fallback)
|
|
82
|
+
template = SCHEMA_URLS.get(kind, SCHEMA_URLS["DataProduct"])
|
|
83
|
+
versions_to_try = [api_version, clean_version]
|
|
84
|
+
if not api_version.startswith("v"):
|
|
85
|
+
versions_to_try.append(f"v{api_version}")
|
|
86
|
+
|
|
87
|
+
last_error = None
|
|
88
|
+
for v in versions_to_try:
|
|
89
|
+
url = template.format(api_version=v)
|
|
90
|
+
try:
|
|
91
|
+
response = requests.get(url, timeout=10)
|
|
92
|
+
if response.status_code == 200:
|
|
93
|
+
schema = response.json()
|
|
94
|
+
jsonschema.validate(spec, schema)
|
|
95
|
+
return
|
|
96
|
+
last_error = f"HTTP {response.status_code} at {url}"
|
|
97
|
+
except requests.RequestException as e:
|
|
98
|
+
last_error = str(e)
|
|
99
|
+
|
|
100
|
+
raise SchemaFetchError(
|
|
101
|
+
f"Schema not found for {kind} apiVersion={api_version} ({last_error})"
|
|
102
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .core import DataProduct, DataContract
|
|
2
|
+
from .exceptions import (
|
|
3
|
+
OpenDataMeshError,
|
|
4
|
+
DataProductValidationError,
|
|
5
|
+
DataContractValidationError,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"DataProduct",
|
|
10
|
+
"DataContract",
|
|
11
|
+
"OpenDataMeshError",
|
|
12
|
+
"DataProductValidationError",
|
|
13
|
+
"DataContractValidationError",
|
|
14
|
+
]
|
dmesh/sdk/models/core.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, Dict, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class DataProduct:
|
|
8
|
+
id: str
|
|
9
|
+
specification: Dict[str, Any]
|
|
10
|
+
created_at: Optional[datetime] = None
|
|
11
|
+
updated_at: Optional[datetime] = None
|
|
12
|
+
|
|
13
|
+
@property
|
|
14
|
+
def domain(self) -> str:
|
|
15
|
+
return self.specification.get("domain", "")
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def name(self) -> str:
|
|
19
|
+
return self.specification.get("name", "")
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def version(self) -> str:
|
|
23
|
+
return self.specification.get("version", "v1.0.0")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class DataContract:
|
|
28
|
+
id: str
|
|
29
|
+
data_product_id: str
|
|
30
|
+
specification: Dict[str, Any]
|
|
31
|
+
created_at: Optional[datetime] = None
|
|
32
|
+
updated_at: Optional[datetime] = None
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
class OpenDataMeshError(Exception):
|
|
2
|
+
"""Base class for all SDK errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataProductValidationError(OpenDataMeshError):
|
|
7
|
+
"""Raised when a Data Product specification is invalid according to ODPS schema."""
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DataContractValidationError(OpenDataMeshError):
|
|
12
|
+
"""Raised when a Data Contract specification is invalid according to ODCS schema."""
|
|
13
|
+
pass
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from typing import Any, List, Optional, Union
|
|
2
|
+
from dmesh.sdk.models import DataContract
|
|
3
|
+
from dmesh.sdk.ports.repository import DataContractRepository, DataProductRepository
|
|
4
|
+
from dmesh.sdk.sdk import AsyncSDK, _RepoWrapper
|
|
5
|
+
|
|
6
|
+
async def create_dc(
|
|
7
|
+
repo: DataContractRepository,
|
|
8
|
+
dp_repo: DataProductRepository,
|
|
9
|
+
spec: dict[str, Any],
|
|
10
|
+
dp_id: str,
|
|
11
|
+
include_metadata: Optional[bool] = False
|
|
12
|
+
) -> Union[dict, DataContract]:
|
|
13
|
+
"""Create a data contract for a given data product."""
|
|
14
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=dp_repo, dc_repo=repo)).put_data_contract(
|
|
15
|
+
spec, dp_id=dp_id, include_metadata=include_metadata
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
async def update_dc(
|
|
19
|
+
repo: DataContractRepository,
|
|
20
|
+
spec: dict[str, Any],
|
|
21
|
+
include_metadata: Optional[bool] = False
|
|
22
|
+
) -> Union[dict, DataContract]:
|
|
23
|
+
"""Update an existing data contract."""
|
|
24
|
+
if not spec.get("id"):
|
|
25
|
+
raise ValueError("Data contract id is required for update")
|
|
26
|
+
return await AsyncSDK(_RepoWrapper(dc_repo=repo)).put_data_contract(
|
|
27
|
+
spec, include_metadata=include_metadata
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
async def patch_dc(
|
|
31
|
+
repo: DataContractRepository,
|
|
32
|
+
spec: dict[str, Any],
|
|
33
|
+
include_metadata: Optional[bool] = False
|
|
34
|
+
) -> Union[dict, DataContract]:
|
|
35
|
+
"""Patch an existing data contract."""
|
|
36
|
+
dc_id = spec.get("id")
|
|
37
|
+
if not dc_id:
|
|
38
|
+
raise ValueError("Data contract id is required for patch")
|
|
39
|
+
return await AsyncSDK(_RepoWrapper(dc_repo=repo)).patch_data_contract(
|
|
40
|
+
spec, id=dc_id, include_metadata=include_metadata
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
async def get_dc(
|
|
44
|
+
repo: DataContractRepository,
|
|
45
|
+
id: str,
|
|
46
|
+
include_metadata: Optional[bool] = False
|
|
47
|
+
) -> Optional[Union[dict, DataContract]]:
|
|
48
|
+
"""Fetch a single data contract by ID."""
|
|
49
|
+
return await AsyncSDK(_RepoWrapper(dc_repo=repo)).get_data_contract(id, include_metadata=include_metadata)
|
|
50
|
+
|
|
51
|
+
async def list_dcs(
|
|
52
|
+
repo: DataContractRepository,
|
|
53
|
+
dp_id: Optional[str] = None,
|
|
54
|
+
include_metadata: Optional[bool] = False
|
|
55
|
+
) -> List[Union[dict, DataContract]]:
|
|
56
|
+
"""List data contracts, optionally filtering by parent data product."""
|
|
57
|
+
return await AsyncSDK(_RepoWrapper(dc_repo=repo)).list_data_contracts(
|
|
58
|
+
dp_id=dp_id, include_metadata=include_metadata
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
async def delete_dc(repo: DataContractRepository, id: str) -> bool:
|
|
62
|
+
"""Delete a data contract by ID."""
|
|
63
|
+
return await AsyncSDK(_RepoWrapper(dc_repo=repo)).delete_data_contract(id)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Any, List, Optional, Union
|
|
2
|
+
from dmesh.sdk.models import DataProduct
|
|
3
|
+
from dmesh.sdk.ports.repository import DataProductRepository
|
|
4
|
+
from dmesh.sdk.sdk import AsyncSDK, _RepoWrapper
|
|
5
|
+
|
|
6
|
+
async def create_dp(
|
|
7
|
+
repo: DataProductRepository,
|
|
8
|
+
spec: dict[str, Any],
|
|
9
|
+
domain: Optional[str] = None,
|
|
10
|
+
name: Optional[str] = None,
|
|
11
|
+
include_metadata: Optional[bool] = False
|
|
12
|
+
) -> Union[dict, DataProduct]:
|
|
13
|
+
"""Create a new data product."""
|
|
14
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=repo)).put_data_product(
|
|
15
|
+
spec, domain=domain, name=name, include_metadata=include_metadata
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
async def update_dp(
|
|
19
|
+
repo: DataProductRepository,
|
|
20
|
+
spec: dict[str, Any],
|
|
21
|
+
include_metadata: Optional[bool] = False
|
|
22
|
+
) -> Union[dict, DataProduct]:
|
|
23
|
+
"""Update an existing data product."""
|
|
24
|
+
if not spec.get("id"):
|
|
25
|
+
raise ValueError("Data product id is required for update")
|
|
26
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=repo)).put_data_product(
|
|
27
|
+
spec, include_metadata=include_metadata
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
async def get_dp(
|
|
31
|
+
repo: DataProductRepository,
|
|
32
|
+
id: Optional[str] = None,
|
|
33
|
+
include_metadata: bool = False
|
|
34
|
+
) -> Optional[Union[dict, DataProduct]]:
|
|
35
|
+
"""Fetch a single data product by ID."""
|
|
36
|
+
if not id:
|
|
37
|
+
return None
|
|
38
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=repo)).get_data_product(id, include_metadata=include_metadata)
|
|
39
|
+
|
|
40
|
+
async def list_dps(
|
|
41
|
+
repo: DataProductRepository,
|
|
42
|
+
domain: Optional[str] = None,
|
|
43
|
+
name: Optional[str] = None,
|
|
44
|
+
include_metadata: Optional[bool] = False
|
|
45
|
+
) -> List[Union[dict, DataProduct]]:
|
|
46
|
+
"""List data products with optional filtering."""
|
|
47
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=repo)).list_data_products(
|
|
48
|
+
domain=domain, name=name, include_metadata=include_metadata
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
async def delete_dp(repo: DataProductRepository, id: str) -> bool:
|
|
52
|
+
"""Delete a data product by ID."""
|
|
53
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=repo)).delete_data_product(id)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Any, List, Optional, Union
|
|
2
|
+
from dmesh.sdk.models import DataProduct, DataContract
|
|
3
|
+
from dmesh.sdk.ports.repository import DataProductRepository, DataContractRepository
|
|
4
|
+
from dmesh.sdk.sdk import AsyncSDK, _RepoWrapper
|
|
5
|
+
|
|
6
|
+
async def discover(
|
|
7
|
+
dp_repo: DataProductRepository,
|
|
8
|
+
dc_repo: DataContractRepository,
|
|
9
|
+
dp_id: Optional[str] = None,
|
|
10
|
+
domain: Optional[str] = None,
|
|
11
|
+
name: Optional[str] = None,
|
|
12
|
+
include_metadata: Optional[bool] = False,
|
|
13
|
+
include_metadata_in_response: Optional[bool] = False
|
|
14
|
+
) -> List[Union[dict, DataProduct, DataContract]]:
|
|
15
|
+
"""Discovery by ID OR by domain and name. Returns a flat list of DataProduct and DataContract objects."""
|
|
16
|
+
# Note: include_metadata_in_response maps to include_metadata in SDK
|
|
17
|
+
incl = include_metadata or include_metadata_in_response
|
|
18
|
+
return await AsyncSDK(_RepoWrapper(dp_repo=dp_repo, dc_repo=dc_repo)).discover(
|
|
19
|
+
dp_id=dp_id, domain=domain, name=name, include_metadata=incl
|
|
20
|
+
)
|