schema-guard-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: schema-guard-core
3
+ Version: 0.1.0
4
+ Summary: Structured Output Validator & Schema Enforcer SDK
5
+ Project-URL: Homepage, https://github.com/NayanSrivastav/schema-guard
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: jsonschema>=4.0.0
9
+ Requires-Dist: pydantic>=2.0.0
10
+ Requires-Dist: requests>=2.25.0
11
+
12
+ # SchemaGuard Python SDK
13
+
14
+ The official Python client for SchemaGuard — a runtime validation, monitoring, and coercion engine for protecting your LLM pipelines from bad JSON schemas.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install schemaguard
20
+ ```
21
+
22
+ ## Quick Start (Offline Mode)
23
+
24
+ If you don't want to run the core Golang server, use the `LocalValidator` to catch schema drifts natively with exactly mapped formatting constraints dynamically directly in your Python code environments.
25
+
26
+ ```python
27
+ from schemaguard import LocalValidator
28
+
29
+ schema = {
30
+ "type": "object",
31
+ "properties": { "id": {"type": "integer"} },
32
+ "required": ["id"]
33
+ }
34
+
35
+ validator = LocalValidator(schema)
36
+ llm_response = '```json\n{"id": 123}\n```'
37
+
38
+ is_valid, data, errors = validator.validate(llm_response)
39
+ if is_valid:
40
+ print("Clean format!", data["id"])
41
+ else:
42
+ print("Errors:", errors)
43
+ ```
44
+
45
+ ## Production Mode (API Client)
46
+
47
+ In production, SchemaGuard manages schema distribution caching, active OpenTelemetry cost tracking, and circuit breakers directly locally or on your private clusters utilizing our blazingly fast Golang engine.
48
+
49
+ ```python
50
+ from schemaguard import SchemaGuardClient
51
+
52
+ client = SchemaGuardClient(api_key="sg_...", base_url="http://localhost:8080/v1")
53
+
54
+ res = client.validate(schema_name="invoice_schema", version="latest", payload='{"total": "-10"}')
55
+ if not res.get("status") == "PASS":
56
+ print(res["errors"]) # Maps dynamically cleanly for prompt re-injection strategies
57
+ ```
58
+
59
+ ## LangChain Integration
60
+
61
+ Using our native parser natively integrates into LangChain's existing RetryingOutputParser bounds without needing human configurations.
62
+
63
+ ```python
64
+ from schemaguard_parser import SchemaGuardOutputParser
65
+
66
+ parser = SchemaGuardOutputParser(schema_dict=schema)
67
+ prompt = PromptTemplate(
68
+ template="Extract invoice details.\n{format_instructions}\n{context}",
69
+ input_variables=["context"],
70
+ partial_variables={"format_instructions": parser.get_format_instructions()},
71
+ )
72
+
73
+ chain = prompt | llm | parser
74
+
75
+ # Automatically drops schema bounds into the prompt, extracts response payloads, validates constraints, and natively rejects outputs throwing `OutputParserException`.
76
+ ```
@@ -0,0 +1,65 @@
1
+ # SchemaGuard Python SDK
2
+
3
+ The official Python client for SchemaGuard — a runtime validation, monitoring, and coercion engine for protecting your LLM pipelines from bad JSON schemas.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install schemaguard
9
+ ```
10
+
11
+ ## Quick Start (Offline Mode)
12
+
13
+ If you don't want to run the core Golang server, use the `LocalValidator` to catch schema drifts natively with exactly mapped formatting constraints dynamically directly in your Python code environments.
14
+
15
+ ```python
16
+ from schemaguard import LocalValidator
17
+
18
+ schema = {
19
+ "type": "object",
20
+ "properties": { "id": {"type": "integer"} },
21
+ "required": ["id"]
22
+ }
23
+
24
+ validator = LocalValidator(schema)
25
+ llm_response = '```json\n{"id": 123}\n```'
26
+
27
+ is_valid, data, errors = validator.validate(llm_response)
28
+ if is_valid:
29
+ print("Clean format!", data["id"])
30
+ else:
31
+ print("Errors:", errors)
32
+ ```
33
+
34
+ ## Production Mode (API Client)
35
+
36
+ In production, SchemaGuard manages schema distribution caching, active OpenTelemetry cost tracking, and circuit breakers directly locally or on your private clusters utilizing our blazingly fast Golang engine.
37
+
38
+ ```python
39
+ from schemaguard import SchemaGuardClient
40
+
41
+ client = SchemaGuardClient(api_key="sg_...", base_url="http://localhost:8080/v1")
42
+
43
+ res = client.validate(schema_name="invoice_schema", version="latest", payload='{"total": "-10"}')
44
+ if not res.get("status") == "PASS":
45
+ print(res["errors"]) # Maps dynamically cleanly for prompt re-injection strategies
46
+ ```
47
+
48
+ ## LangChain Integration
49
+
50
+ Using our native parser natively integrates into LangChain's existing RetryingOutputParser bounds without needing human configurations.
51
+
52
+ ```python
53
+ from schemaguard_parser import SchemaGuardOutputParser
54
+
55
+ parser = SchemaGuardOutputParser(schema_dict=schema)
56
+ prompt = PromptTemplate(
57
+ template="Extract invoice details.\n{format_instructions}\n{context}",
58
+ input_variables=["context"],
59
+ partial_variables={"format_instructions": parser.get_format_instructions()},
60
+ )
61
+
62
+ chain = prompt | llm | parser
63
+
64
+ # Automatically drops schema bounds into the prompt, extracts response payloads, validates constraints, and natively rejects outputs throwing `OutputParserException`.
65
+ ```
@@ -0,0 +1,18 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "schema-guard-core"
7
+ version = "0.1.0"
8
+ description = "Structured Output Validator & Schema Enforcer SDK"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ dependencies = [
12
+ "jsonschema>=4.0.0",
13
+ "pydantic>=2.0.0",
14
+ "requests>=2.25.0"
15
+ ]
16
+
17
+ [project.urls]
18
+ Homepage = "https://github.com/NayanSrivastav/schema-guard"
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: schema-guard-core
3
+ Version: 0.1.0
4
+ Summary: Structured Output Validator & Schema Enforcer SDK
5
+ Project-URL: Homepage, https://github.com/NayanSrivastav/schema-guard
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: jsonschema>=4.0.0
9
+ Requires-Dist: pydantic>=2.0.0
10
+ Requires-Dist: requests>=2.25.0
11
+
12
+ # SchemaGuard Python SDK
13
+
14
+ The official Python client for SchemaGuard — a runtime validation, monitoring, and coercion engine for protecting your LLM pipelines from bad JSON schemas.
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install schemaguard
20
+ ```
21
+
22
+ ## Quick Start (Offline Mode)
23
+
24
+ If you don't want to run the core Golang server, use the `LocalValidator` to catch schema drifts natively with exactly mapped formatting constraints dynamically directly in your Python code environments.
25
+
26
+ ```python
27
+ from schemaguard import LocalValidator
28
+
29
+ schema = {
30
+ "type": "object",
31
+ "properties": { "id": {"type": "integer"} },
32
+ "required": ["id"]
33
+ }
34
+
35
+ validator = LocalValidator(schema)
36
+ llm_response = '```json\n{"id": 123}\n```'
37
+
38
+ is_valid, data, errors = validator.validate(llm_response)
39
+ if is_valid:
40
+ print("Clean format!", data["id"])
41
+ else:
42
+ print("Errors:", errors)
43
+ ```
44
+
45
+ ## Production Mode (API Client)
46
+
47
+ In production, SchemaGuard manages schema distribution caching, active OpenTelemetry cost tracking, and circuit breakers directly locally or on your private clusters utilizing our blazingly fast Golang engine.
48
+
49
+ ```python
50
+ from schemaguard import SchemaGuardClient
51
+
52
+ client = SchemaGuardClient(api_key="sg_...", base_url="http://localhost:8080/v1")
53
+
54
+ res = client.validate(schema_name="invoice_schema", version="latest", payload='{"total": "-10"}')
55
+ if not res.get("status") == "PASS":
56
+ print(res["errors"]) # Maps dynamically cleanly for prompt re-injection strategies
57
+ ```
58
+
59
+ ## LangChain Integration
60
+
61
+ Using our native parser natively integrates into LangChain's existing RetryingOutputParser bounds without needing human configurations.
62
+
63
+ ```python
64
+ from schemaguard_parser import SchemaGuardOutputParser
65
+
66
+ parser = SchemaGuardOutputParser(schema_dict=schema)
67
+ prompt = PromptTemplate(
68
+ template="Extract invoice details.\n{format_instructions}\n{context}",
69
+ input_variables=["context"],
70
+ partial_variables={"format_instructions": parser.get_format_instructions()},
71
+ )
72
+
73
+ chain = prompt | llm | parser
74
+
75
+ # Automatically drops schema bounds into the prompt, extracts response payloads, validates constraints, and natively rejects outputs throwing `OutputParserException`.
76
+ ```
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ schema_guard_core.egg-info/PKG-INFO
4
+ schema_guard_core.egg-info/SOURCES.txt
5
+ schema_guard_core.egg-info/dependency_links.txt
6
+ schema_guard_core.egg-info/requires.txt
7
+ schema_guard_core.egg-info/top_level.txt
8
+ schemaguard/__init__.py
9
+ schemaguard/client.py
10
+ schemaguard/local.py
11
+ tests/test_local_validator_bdd.py
@@ -0,0 +1,3 @@
1
+ jsonschema>=4.0.0
2
+ pydantic>=2.0.0
3
+ requests>=2.25.0
@@ -0,0 +1,4 @@
1
+ from .client import SchemaGuardClient
2
+ from .local import LocalValidator
3
+
4
+ __all__ = ["SchemaGuardClient", "LocalValidator"]
@@ -0,0 +1,24 @@
1
+ import requests
2
+ from typing import Dict, Any, Optional
3
+
4
+ class SchemaGuardClient:
5
+ """
6
+ Remote client for delegating LLM response validation to the Golang core platform.
7
+ Designed to interact securely with the API engine metrics and circuit breaking layers.
8
+ """
9
+ def __init__(self, api_key: str, base_url: str = "http://localhost:8080/v1"):
10
+ self.api_key = api_key
11
+ self.base_url = base_url.rstrip("/")
12
+ self.session = requests.Session()
13
+ self.session.headers.update({"Authorization": f"Bearer {self.api_key}"})
14
+
15
+ def validate(self, schema_name: str, payload: str, version: Optional[str] = "latest") -> Dict[str, Any]:
16
+ """
17
+ Submits unstructured LLM payload text entirely securely targeting the Schema registry constraints.
18
+ """
19
+ response = self.session.post(
20
+ f"{self.base_url}/validate",
21
+ json={"schema_name": schema_name, "version": version, "payload": payload}
22
+ )
23
+ response.raise_for_status()
24
+ return response.json()
@@ -0,0 +1,73 @@
1
+ import json
2
+ import jsonschema
3
+ from typing import Dict, Any, Tuple
4
+
5
+ class LocalValidator:
6
+ """
7
+ Offline local MVP capability to test strict syntax validations without launching
8
+ the full Go binary. Useful for CI pipelines and quick testing natively via Python.
9
+ """
10
+ @staticmethod
11
+ def coerce_heuristics(val: Any) -> Any:
12
+ # Replicates Go's natively resilient coercion boundaries seamlessly
13
+ if isinstance(val, str):
14
+ if val.isdigit():
15
+ return int(val)
16
+ try:
17
+ return float(val)
18
+ except ValueError:
19
+ pass
20
+ if val.lower() == "true":
21
+ return True
22
+ if val.lower() == "false":
23
+ return False
24
+ if (val.startswith('{') and val.endswith('}')) or (val.startswith('[') and val.endswith(']')):
25
+ try:
26
+ parsed = json.loads(val)
27
+ return LocalValidator.coerce_heuristics(parsed)
28
+ except Exception:
29
+ pass
30
+ return val
31
+ elif isinstance(val, dict):
32
+ return {k: LocalValidator.coerce_heuristics(v) for k, v in val.items()}
33
+ elif isinstance(val, list):
34
+ return [LocalValidator.coerce_heuristics(v) for v in val]
35
+ return val
36
+ def __init__(self, schema: Dict[str, Any]):
37
+ self.schema = schema
38
+
39
+ def validate(self, payload: str) -> Tuple[bool, Any, list]:
40
+ """
41
+ Returns (is_valid, parsed_json_or_raw, [errors])
42
+ """
43
+ raw = payload
44
+
45
+ # 1. Attempt to extract organic JSON block hidden within conversational markdown
46
+ if "```json" in payload:
47
+ try:
48
+ raw = payload.split("```json")[1].split("```")[0].strip()
49
+ except IndexError:
50
+ pass
51
+ elif "```" in payload:
52
+ try:
53
+ raw = payload.split("```")[1].split("```")[0].strip()
54
+ except IndexError:
55
+ pass
56
+
57
+ # 2. Syntax Check
58
+ try:
59
+ data = json.loads(raw)
60
+ except json.JSONDecodeError as e:
61
+ return False, raw, [f"Invalid JSON Format (Root Engine): {str(e)}"]
62
+
63
+ # 3. Automatic Resilient Coercion matching Backend logic mapping
64
+ data = self.coerce_heuristics(data)
65
+
66
+ # 3. Ruleset Validation
67
+ try:
68
+ jsonschema.validate(instance=data, schema=self.schema)
69
+ return True, data, []
70
+ except jsonschema.exceptions.ValidationError as e:
71
+ # Map precise path bounds (e.g. metadata.views object failed type constraints)
72
+ path = ".".join([str(p) for p in e.path]) if e.path else "root"
73
+ return False, data, [f"Field '{path}': {e.message}"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,64 @@
1
+ import pytest
2
+ from schemaguard.local import LocalValidator
3
+
4
+ class TestSchemaGuardLocalValidation_BDD:
5
+ """Feature: Offline Schema Verification & Markdown Extraction via Python SDK natively"""
6
+
7
+ def setup_method(self):
8
+ # GIVEN: The developer initializes a heavily strict schema offline cache natively seamlessly
9
+ self.schema = {
10
+ "type": "object",
11
+ "properties": {
12
+ "age": {"type": "integer"}
13
+ },
14
+ "required": ["age"]
15
+ }
16
+ self.validator = LocalValidator(self.schema)
17
+
18
+ # -------------------------------------------------------------------------
19
+ # Scenario 1: Validating a perfectly structured JSON payload securely
20
+ # -------------------------------------------------------------------------
21
+ def test_scenario_validating_perfect_json_payload(self):
22
+ # GIVEN an incoming AI request containing correct JSON mapping bindings internally
23
+ valid_llm_response = '{"age": 25}'
24
+
25
+ # WHEN the Python SDK parses the generated response locally isolating constraints accurately
26
+ is_valid, data, errors = self.validator.validate(valid_llm_response)
27
+
28
+ # THEN the system should trigger a PASS cleanly and extract the integer functionally
29
+ assert is_valid is True, "Expected valid execution natively mapped truthfully"
30
+ assert data["age"] == 25
31
+ assert len(errors) == 0
32
+
33
+ # -------------------------------------------------------------------------
34
+ # Scenario 2: Fixing a hallucinated LLM markdown response natively
35
+ # -------------------------------------------------------------------------
36
+ def test_scenario_fixing_hallucinated_markdown(self):
37
+ # GIVEN a conversational payload containing messy markdown and implicitly wrong type formatting heavily natively
38
+ hallucinated_response = "Here is the extracted information requested:\n```json\n{\"age\": \"25\"}\n```\nLet me know if you need anything else!"
39
+
40
+ # WHEN the Python SDK dynamically truncates the markdown formatting attempting fallback logic internally
41
+ is_valid, data, errors = self.validator.validate(hallucinated_response)
42
+
43
+ # THEN it translates the parsed string `"25"` natively into the rigid integer `25` exactly bypassing LLM limits safely
44
+ assert is_valid is True, f"Expected Coerced String mapping natively internally structurally. Errors: {errors}"
45
+ assert data["age"] == 25, "Expected AST translation mapping the string inherently cleanly!"
46
+ assert len(errors) == 0
47
+
48
+ # -------------------------------------------------------------------------
49
+ # Scenario 3: Rejecting an absolutely broken payload logically dynamically
50
+ # -------------------------------------------------------------------------
51
+ def test_scenario_rejecting_missing_parameters(self):
52
+ # GIVEN an LLM output completely hallucinating past the critically required 'age' variable logically
53
+ broken_response = '{"name": "Enterprise Engineer"}'
54
+
55
+ # WHEN the internal Local SDK strict evaluation runs directly targeting formatting structurally
56
+ is_valid, data, errors = self.validator.validate(broken_response)
57
+
58
+ # THEN the engine traps a FAIL securely catching the exact AST path failure seamlessly mapping back error arrays logically
59
+ assert is_valid is False
60
+ assert len(errors) > 0
61
+
62
+ # Verify that the missing 'age' property was correctly flagged inherently
63
+ error_message_contains_age = any("age" in err for err in errors)
64
+ assert error_message_contains_age is True, "Expected specific error message dictating 'age' explicitly!"