ospac 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ospac might be problematic. Click here for more details.

@@ -0,0 +1,283 @@
1
+ """
2
+ SPDX license dataset processor.
3
+ Downloads and processes the official SPDX license list.
4
+ """
5
+
6
+ import json
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import Dict, List, Any, Optional
10
+ import requests
11
+ import yaml
12
+ from datetime import datetime
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class SPDXProcessor:
18
+ """Process SPDX license data."""
19
+
20
+ SPDX_LICENSE_URL = "https://raw.githubusercontent.com/spdx/license-list-data/main/json/licenses.json"
21
+ SPDX_EXCEPTIONS_URL = "https://raw.githubusercontent.com/spdx/license-list-data/main/json/exceptions.json"
22
+
23
+ def __init__(self, cache_dir: Optional[Path] = None):
24
+ """Initialize SPDX processor."""
25
+ self.cache_dir = cache_dir or Path.home() / ".cache" / "ospac" / "spdx"
26
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
27
+ self.licenses = {}
28
+ self.exceptions = {}
29
+
30
+ def download_spdx_data(self, force: bool = False) -> Dict[str, Any]:
31
+ """
32
+ Download SPDX license data.
33
+
34
+ Args:
35
+ force: Force re-download even if cached
36
+
37
+ Returns:
38
+ Dictionary with licenses and exceptions
39
+ """
40
+ licenses_cache = self.cache_dir / "licenses.json"
41
+ exceptions_cache = self.cache_dir / "exceptions.json"
42
+
43
+ # Check cache
44
+ if not force and licenses_cache.exists() and exceptions_cache.exists():
45
+ logger.info("Loading SPDX data from cache")
46
+ with open(licenses_cache) as f:
47
+ licenses = json.load(f)
48
+ with open(exceptions_cache) as f:
49
+ exceptions = json.load(f)
50
+ else:
51
+ logger.info("Downloading SPDX license data")
52
+
53
+ # Download licenses
54
+ response = requests.get(self.SPDX_LICENSE_URL)
55
+ response.raise_for_status()
56
+ licenses = response.json()
57
+
58
+ # Download exceptions
59
+ response = requests.get(self.SPDX_EXCEPTIONS_URL)
60
+ response.raise_for_status()
61
+ exceptions = response.json()
62
+
63
+ # Cache the data
64
+ with open(licenses_cache, "w") as f:
65
+ json.dump(licenses, f, indent=2)
66
+ with open(exceptions_cache, "w") as f:
67
+ json.dump(exceptions, f, indent=2)
68
+
69
+ logger.info(f"Cached SPDX data to {self.cache_dir}")
70
+
71
+ self.licenses = licenses.get("licenses", [])
72
+ self.exceptions = exceptions.get("exceptions", [])
73
+
74
+ logger.info(f"Loaded {len(self.licenses)} licenses and {len(self.exceptions)} exceptions")
75
+
76
+ return {
77
+ "licenses": self.licenses,
78
+ "exceptions": self.exceptions,
79
+ "version": licenses.get("licenseListVersion"),
80
+ "release_date": licenses.get("releaseDate")
81
+ }
82
+
83
+ def get_license_text(self, license_id: str) -> Optional[str]:
84
+ """
85
+ Get the full text of a license.
86
+
87
+ Args:
88
+ license_id: SPDX license identifier
89
+
90
+ Returns:
91
+ License text or None if not found
92
+ """
93
+ text_cache = self.cache_dir / "texts" / f"{license_id}.txt"
94
+
95
+ if text_cache.exists():
96
+ return text_cache.read_text()
97
+
98
+ # Find license details URL
99
+ for license_data in self.licenses:
100
+ if license_data.get("licenseId") == license_id:
101
+ details_url = license_data.get("detailsUrl")
102
+ if details_url:
103
+ try:
104
+ response = requests.get(details_url)
105
+ response.raise_for_status()
106
+ details = response.json()
107
+
108
+ license_text = details.get("licenseText", "")
109
+
110
+ # Cache the text
111
+ text_cache.parent.mkdir(parents=True, exist_ok=True)
112
+ text_cache.write_text(license_text)
113
+
114
+ return license_text
115
+ except Exception as e:
116
+ logger.error(f"Failed to fetch license text for {license_id}: {e}")
117
+
118
+ return None
119
+
120
+ def extract_basic_info(self, license_data: Dict[str, Any]) -> Dict[str, Any]:
121
+ """
122
+ Extract basic information from SPDX license data.
123
+
124
+ Args:
125
+ license_data: SPDX license data
126
+
127
+ Returns:
128
+ Extracted information
129
+ """
130
+ return {
131
+ "id": license_data.get("licenseId"),
132
+ "name": license_data.get("name"),
133
+ "reference": license_data.get("reference"),
134
+ "is_deprecated": license_data.get("isDeprecatedLicenseId", False),
135
+ "is_osi_approved": license_data.get("isOsiApproved", False),
136
+ "is_fsf_libre": license_data.get("isFsfLibre", False),
137
+ "see_also": license_data.get("seeAlso", []),
138
+ }
139
+
140
+ def categorize_license(self, license_id: str, license_text: Optional[str] = None) -> str:
141
+ """
142
+ Categorize a license based on its characteristics.
143
+
144
+ Args:
145
+ license_id: SPDX license identifier
146
+ license_text: Optional license text
147
+
148
+ Returns:
149
+ License category
150
+ """
151
+ # Basic categorization based on known licenses
152
+ categorization = {
153
+ # Permissive
154
+ "MIT": "permissive",
155
+ "Apache-2.0": "permissive",
156
+ "BSD-2-Clause": "permissive",
157
+ "BSD-3-Clause": "permissive",
158
+ "ISC": "permissive",
159
+ "0BSD": "permissive",
160
+ "Unlicense": "public_domain",
161
+ "CC0-1.0": "public_domain",
162
+
163
+ # Weak copyleft
164
+ "LGPL-2.1": "copyleft_weak",
165
+ "LGPL-3.0": "copyleft_weak",
166
+ "MPL-2.0": "copyleft_weak",
167
+ "EPL-2.0": "copyleft_weak",
168
+ "CDDL-1.0": "copyleft_weak",
169
+
170
+ # Strong copyleft
171
+ "GPL-2.0": "copyleft_strong",
172
+ "GPL-3.0": "copyleft_strong",
173
+ "AGPL-3.0": "copyleft_strong",
174
+
175
+ # Proprietary/Commercial
176
+ "Proprietary": "proprietary",
177
+ "Commercial": "proprietary",
178
+ }
179
+
180
+ # Check exact match
181
+ if license_id in categorization:
182
+ return categorization[license_id]
183
+
184
+ # Check patterns
185
+ if license_id.startswith("MIT"):
186
+ return "permissive"
187
+ elif license_id.startswith("BSD"):
188
+ return "permissive"
189
+ elif license_id.startswith("Apache"):
190
+ return "permissive"
191
+ elif license_id.startswith("GPL"):
192
+ return "copyleft_strong"
193
+ elif license_id.startswith("LGPL"):
194
+ return "copyleft_weak"
195
+ elif license_id.startswith("AGPL"):
196
+ return "copyleft_strong"
197
+ elif license_id.startswith("MPL"):
198
+ return "copyleft_weak"
199
+ elif license_id.startswith("EPL"):
200
+ return "copyleft_weak"
201
+ elif "CC0" in license_id or "Unlicense" in license_id:
202
+ return "public_domain"
203
+
204
+ # Default to permissive for unknown
205
+ return "permissive"
206
+
207
+ def process_all_licenses(self) -> List[Dict[str, Any]]:
208
+ """
209
+ Process all SPDX licenses.
210
+
211
+ Returns:
212
+ List of processed license data
213
+ """
214
+ processed = []
215
+
216
+ for license_data in self.licenses:
217
+ license_id = license_data.get("licenseId")
218
+ if not license_id:
219
+ continue
220
+
221
+ logger.info(f"Processing {license_id}")
222
+
223
+ # Extract basic info
224
+ info = self.extract_basic_info(license_data)
225
+
226
+ # Get license text
227
+ license_text = self.get_license_text(license_id)
228
+
229
+ # Categorize
230
+ info["category"] = self.categorize_license(license_id, license_text)
231
+
232
+ # Add text if available
233
+ if license_text:
234
+ info["has_full_text"] = True
235
+ info["text_length"] = len(license_text)
236
+ else:
237
+ info["has_full_text"] = False
238
+
239
+ processed.append(info)
240
+
241
+ return processed
242
+
243
+ def save_processed_data(self, data: List[Dict[str, Any]], output_dir: Path) -> None:
244
+ """
245
+ Save processed license data to files.
246
+
247
+ Args:
248
+ data: Processed license data
249
+ output_dir: Output directory
250
+ """
251
+ output_dir.mkdir(parents=True, exist_ok=True)
252
+
253
+ # Save as JSON
254
+ json_file = output_dir / "spdx_processed.json"
255
+ with open(json_file, "w") as f:
256
+ json.dump({
257
+ "licenses": data,
258
+ "total": len(data),
259
+ "generated": datetime.now().isoformat(),
260
+ "version": self.licenses[0].get("licenseListVersion") if self.licenses else None
261
+ }, f, indent=2)
262
+
263
+ logger.info(f"Saved processed data to {json_file}")
264
+
265
+ # Generate summary statistics
266
+ stats = {
267
+ "total_licenses": len(data),
268
+ "categories": {},
269
+ "osi_approved": sum(1 for l in data if l.get("is_osi_approved")),
270
+ "fsf_libre": sum(1 for l in data if l.get("is_fsf_libre")),
271
+ "deprecated": sum(1 for l in data if l.get("is_deprecated")),
272
+ "with_full_text": sum(1 for l in data if l.get("has_full_text"))
273
+ }
274
+
275
+ for license_info in data:
276
+ category = license_info.get("category", "unknown")
277
+ stats["categories"][category] = stats["categories"].get(category, 0) + 1
278
+
279
+ stats_file = output_dir / "spdx_stats.yaml"
280
+ with open(stats_file, "w") as f:
281
+ yaml.dump(stats, f, default_flow_style=False)
282
+
283
+ logger.info(f"Saved statistics to {stats_file}")
@@ -0,0 +1,11 @@
1
+ """Policy runtime engine."""
2
+
3
+ from ospac.runtime.engine import PolicyRuntime
4
+ from ospac.runtime.evaluator import RuleEvaluator
5
+ from ospac.runtime.loader import PolicyLoader
6
+
7
+ __all__ = [
8
+ "PolicyRuntime",
9
+ "RuleEvaluator",
10
+ "PolicyLoader",
11
+ ]
@@ -0,0 +1,127 @@
1
+ """
2
+ Policy execution runtime engine.
3
+ """
4
+
5
+ from typing import Dict, List, Any, Optional
6
+ from pathlib import Path
7
+
8
+ from ospac.runtime.loader import PolicyLoader
9
+ from ospac.runtime.evaluator import RuleEvaluator
10
+ from ospac.models.compliance import ComplianceResult, PolicyResult, ActionType
11
+
12
+
13
+ class PolicyRuntime:
14
+ """
15
+ Main policy execution runtime.
16
+ All logic is driven by policy files, not hardcoded.
17
+ """
18
+
19
+ def __init__(self, policy_path: Optional[str] = None):
20
+ """Initialize the policy runtime with policy definitions."""
21
+ self.policies = {}
22
+ self.evaluator = None
23
+
24
+ if policy_path:
25
+ self.load_policies(policy_path)
26
+
27
+ def load_policies(self, policy_path: str) -> None:
28
+ """Load all policy definitions from the specified path."""
29
+ loader = PolicyLoader()
30
+ self.policies = loader.load_all(policy_path)
31
+ self.evaluator = RuleEvaluator(self.policies)
32
+
33
+ @classmethod
34
+ def from_path(cls, policy_path: str) -> "PolicyRuntime":
35
+ """Create a PolicyRuntime instance from a policy directory."""
36
+ return cls(policy_path)
37
+
38
+ def evaluate(self, context: Dict[str, Any]) -> PolicyResult:
39
+ """
40
+ Evaluate context against all loaded policies.
41
+ No business logic here - just policy execution.
42
+ """
43
+ if not self.evaluator:
44
+ raise RuntimeError("No policies loaded. Call load_policies() first.")
45
+
46
+ applicable_rules = self._find_applicable_rules(context)
47
+ results = []
48
+
49
+ for rule in applicable_rules:
50
+ result = self.evaluator.evaluate_rule(rule, context)
51
+ # Convert dict result to PolicyResult
52
+ policy_result = PolicyResult(
53
+ rule_id=result.get("rule_id", "unknown"),
54
+ action=ActionType[result.get("action", "allow").upper()],
55
+ severity=result.get("severity", "info"),
56
+ message=result.get("message"),
57
+ requirements=result.get("requirements", []),
58
+ remediation=result.get("remediation")
59
+ )
60
+ results.append(policy_result)
61
+
62
+ return PolicyResult.aggregate(results)
63
+
64
+ def _find_applicable_rules(self, context: Dict[str, Any]) -> List[Dict]:
65
+ """Find all rules that apply to the given context."""
66
+ applicable = []
67
+
68
+ for policy_name, policy in self.policies.items():
69
+ if "rules" in policy:
70
+ for rule in policy["rules"]:
71
+ if self._rule_applies(rule, context):
72
+ applicable.append(rule)
73
+
74
+ return applicable
75
+
76
+ def _rule_applies(self, rule: Dict, context: Dict) -> bool:
77
+ """Check if a rule applies to the given context."""
78
+ if "when" not in rule:
79
+ return True
80
+
81
+ conditions = rule["when"]
82
+ if not isinstance(conditions, list):
83
+ conditions = [conditions]
84
+
85
+ for condition in conditions:
86
+ if not self._check_condition(condition, context):
87
+ return False
88
+
89
+ return True
90
+
91
+ def _check_condition(self, condition: Dict, context: Dict) -> bool:
92
+ """Check if a single condition is met."""
93
+ for key, value in condition.items():
94
+ if key not in context:
95
+ return False
96
+
97
+ if isinstance(value, list):
98
+ if context[key] not in value:
99
+ return False
100
+ elif context[key] != value:
101
+ return False
102
+
103
+ return True
104
+
105
+ def check_compatibility(self, license1: str, license2: str,
106
+ context: str = "general") -> ComplianceResult:
107
+ """Check if two licenses are compatible."""
108
+ eval_context = {
109
+ "license1": license1,
110
+ "license2": license2,
111
+ "compatibility_context": context
112
+ }
113
+
114
+ result = self.evaluate(eval_context)
115
+ return ComplianceResult.from_policy_result(result)
116
+
117
+ def get_obligations(self, licenses: List[str]) -> Dict[str, Any]:
118
+ """Get all obligations for the given licenses."""
119
+ obligations = {}
120
+
121
+ for license_id in licenses:
122
+ if "obligations" in self.policies:
123
+ license_obligations = self.policies["obligations"].get(license_id, {})
124
+ if license_obligations:
125
+ obligations[license_id] = license_obligations
126
+
127
+ return obligations
@@ -0,0 +1,72 @@
1
+ """
2
+ Rule evaluation engine.
3
+ """
4
+
5
+ from typing import Dict, Any, List, Optional
6
+
7
+
8
+ class RuleEvaluator:
9
+ """Evaluate rules against context."""
10
+
11
+ def __init__(self, policies: Dict[str, Any]):
12
+ """Initialize with loaded policies."""
13
+ self.policies = policies
14
+
15
+ def evaluate_rule(self, rule: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
16
+ """Evaluate a single rule against the context."""
17
+ result = {
18
+ "rule_id": rule.get("id", "unknown"),
19
+ "description": rule.get("description", ""),
20
+ "matched": True,
21
+ "action": None,
22
+ "severity": None,
23
+ "message": None,
24
+ }
25
+
26
+ # Execute the "then" clause
27
+ if "then" in rule:
28
+ then_clause = rule["then"]
29
+ result["action"] = then_clause.get("action", "allow")
30
+ result["severity"] = then_clause.get("severity", "info")
31
+
32
+ # Format message with context
33
+ if "message" in then_clause:
34
+ message = then_clause["message"]
35
+ try:
36
+ result["message"] = message.format(**context)
37
+ except KeyError:
38
+ result["message"] = message
39
+
40
+ # Add requirements if any
41
+ if "requirements" in then_clause:
42
+ result["requirements"] = then_clause["requirements"]
43
+
44
+ # Add remediation if specified
45
+ if "remediation" in then_clause:
46
+ result["remediation"] = then_clause["remediation"]
47
+
48
+ return result
49
+
50
+ def evaluate_decision_tree(self, tree: List[Dict], context: Dict[str, Any]) -> Optional[Dict]:
51
+ """Evaluate a decision tree against context."""
52
+ for node in tree:
53
+ if self._matches_condition(node.get("if", {}), context):
54
+ return node.get("then", {})
55
+
56
+ return None
57
+
58
+ def _matches_condition(self, condition: Dict, context: Dict) -> bool:
59
+ """Check if a condition matches the context."""
60
+ for key, expected_value in condition.items():
61
+ actual_value = context.get(key)
62
+
63
+ if actual_value is None:
64
+ return False
65
+
66
+ if isinstance(expected_value, list):
67
+ if actual_value not in expected_value:
68
+ return False
69
+ elif actual_value != expected_value:
70
+ return False
71
+
72
+ return True
@@ -0,0 +1,54 @@
1
+ """
2
+ Policy file loader.
3
+ """
4
+
5
+ import yaml
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Dict, Any, List
9
+
10
+
11
+ class PolicyLoader:
12
+ """Load and parse policy definitions from files."""
13
+
14
+ SUPPORTED_EXTENSIONS = {".yaml", ".yml", ".json"}
15
+
16
+ def load_all(self, policy_path: str) -> Dict[str, Any]:
17
+ """Load all policy files from the specified directory."""
18
+ path = Path(policy_path)
19
+ if not path.exists():
20
+ raise FileNotFoundError(f"Policy path not found: {policy_path}")
21
+
22
+ policies = {}
23
+
24
+ if path.is_file():
25
+ name = path.stem
26
+ policies[name] = self.load_file(str(path))
27
+ else:
28
+ for file_path in path.rglob("*"):
29
+ if file_path.suffix in self.SUPPORTED_EXTENSIONS:
30
+ relative_name = file_path.relative_to(path).with_suffix("")
31
+ policies[str(relative_name)] = self.load_file(str(file_path))
32
+
33
+ return policies
34
+
35
+ def load_file(self, file_path: str) -> Dict[str, Any]:
36
+ """Load a single policy file."""
37
+ path = Path(file_path)
38
+
39
+ if not path.exists():
40
+ raise FileNotFoundError(f"Policy file not found: {file_path}")
41
+
42
+ with open(path, "r", encoding="utf-8") as f:
43
+ if path.suffix == ".json":
44
+ return json.load(f)
45
+ elif path.suffix in {".yaml", ".yml"}:
46
+ return yaml.safe_load(f)
47
+ else:
48
+ raise ValueError(f"Unsupported file format: {path.suffix}")
49
+
50
+ def validate_policy(self, policy: Dict[str, Any]) -> bool:
51
+ """Validate a policy against the schema."""
52
+ # TODO: Implement schema validation using jsonschema
53
+ required_fields = {"version", "rules"}
54
+ return all(field in policy for field in required_fields)
@@ -0,0 +1,3 @@
1
+ """
2
+ OSPAC utility functions.
3
+ """