ospac 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ospac might be problematic. Click here for more details.
- ospac/__init__.py +19 -0
- ospac/cli/__init__.py +5 -0
- ospac/cli/commands.py +554 -0
- ospac/core/compatibility_matrix.py +332 -0
- ospac/models/__init__.py +12 -0
- ospac/models/compliance.py +161 -0
- ospac/models/license.py +82 -0
- ospac/models/policy.py +97 -0
- ospac/pipeline/__init__.py +14 -0
- ospac/pipeline/data_generator.py +530 -0
- ospac/pipeline/llm_analyzer.py +338 -0
- ospac/pipeline/llm_providers.py +463 -0
- ospac/pipeline/spdx_processor.py +283 -0
- ospac/runtime/__init__.py +11 -0
- ospac/runtime/engine.py +127 -0
- ospac/runtime/evaluator.py +72 -0
- ospac/runtime/loader.py +54 -0
- ospac/utils/__init__.py +3 -0
- ospac-0.1.0.dist-info/METADATA +269 -0
- ospac-0.1.0.dist-info/RECORD +25 -0
- ospac-0.1.0.dist-info/WHEEL +5 -0
- ospac-0.1.0.dist-info/entry_points.txt +2 -0
- ospac-0.1.0.dist-info/licenses/AUTHORS.md +9 -0
- ospac-0.1.0.dist-info/licenses/LICENSE +201 -0
- ospac-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM-based license analyzer with configurable providers.
|
|
3
|
+
Supports OpenAI, Anthropic Claude, and local Ollama.
|
|
4
|
+
Analyzes licenses to extract obligations, compatibility rules, and classifications.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, List, Any, Optional
|
|
9
|
+
import asyncio
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
from ospac.pipeline.llm_providers import (
|
|
13
|
+
LLMConfig,
|
|
14
|
+
LLMProvider,
|
|
15
|
+
create_llm_provider
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LicenseAnalyzer:
|
|
22
|
+
"""
|
|
23
|
+
Analyze licenses using configurable LLM providers.
|
|
24
|
+
Supports OpenAI, Anthropic Claude, and local Ollama.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, provider: str = "ollama", model: str = None, api_key: str = None, **kwargs):
|
|
28
|
+
"""
|
|
29
|
+
Initialize the license analyzer with specified provider.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
provider: LLM provider ("openai", "claude", "ollama")
|
|
33
|
+
model: Model name (auto-selected if not provided)
|
|
34
|
+
api_key: API key for cloud providers (or from environment)
|
|
35
|
+
**kwargs: Additional provider-specific configuration
|
|
36
|
+
"""
|
|
37
|
+
self.provider_name = provider.lower()
|
|
38
|
+
|
|
39
|
+
# Auto-select models if not provided
|
|
40
|
+
if not model:
|
|
41
|
+
model = self._get_default_model(self.provider_name)
|
|
42
|
+
|
|
43
|
+
# Get API key from environment if not provided
|
|
44
|
+
if not api_key:
|
|
45
|
+
api_key = self._get_api_key_from_env(self.provider_name)
|
|
46
|
+
|
|
47
|
+
# Create configuration
|
|
48
|
+
self.config = LLMConfig(
|
|
49
|
+
provider=self.provider_name,
|
|
50
|
+
model=model,
|
|
51
|
+
api_key=api_key,
|
|
52
|
+
**kwargs
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Initialize provider
|
|
56
|
+
try:
|
|
57
|
+
self.llm_provider = create_llm_provider(self.config)
|
|
58
|
+
logger.info(f"Initialized {self.provider_name} provider with model {model}")
|
|
59
|
+
except Exception as e:
|
|
60
|
+
logger.error(f"Failed to initialize {self.provider_name} provider: {e}")
|
|
61
|
+
self.llm_provider = None
|
|
62
|
+
|
|
63
|
+
def _get_default_model(self, provider: str) -> str:
|
|
64
|
+
"""Get default model for each provider."""
|
|
65
|
+
defaults = {
|
|
66
|
+
"openai": "gpt-4o-mini",
|
|
67
|
+
"claude": "claude-3-haiku-20240307",
|
|
68
|
+
"ollama": "llama3:latest"
|
|
69
|
+
}
|
|
70
|
+
return defaults.get(provider, "gpt-4o-mini")
|
|
71
|
+
|
|
72
|
+
def _get_api_key_from_env(self, provider: str) -> Optional[str]:
|
|
73
|
+
"""Get API key from environment variables."""
|
|
74
|
+
env_vars = {
|
|
75
|
+
"openai": "OPENAI_API_KEY",
|
|
76
|
+
"claude": "ANTHROPIC_API_KEY",
|
|
77
|
+
"ollama": None # No API key needed for local Ollama
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
env_var = env_vars.get(provider)
|
|
81
|
+
if env_var:
|
|
82
|
+
api_key = os.getenv(env_var)
|
|
83
|
+
if not api_key:
|
|
84
|
+
logger.warning(f"No API key found in environment variable {env_var}")
|
|
85
|
+
return api_key
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
async def analyze_license(self, license_id: str, license_text: str) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Analyze a license using the configured LLM provider.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
license_id: SPDX license identifier
|
|
94
|
+
license_text: Full license text
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Analysis results
|
|
98
|
+
"""
|
|
99
|
+
if not self.llm_provider:
|
|
100
|
+
logger.warning(f"LLM provider not available, returning fallback for {license_id}")
|
|
101
|
+
return self._get_fallback_analysis(license_id)
|
|
102
|
+
|
|
103
|
+
return await self.llm_provider.analyze_license(license_id, license_text)
|
|
104
|
+
|
|
105
|
+
def _get_fallback_analysis(self, license_id: str) -> Dict[str, Any]:
|
|
106
|
+
"""
|
|
107
|
+
Get fallback analysis based on known license patterns.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
license_id: SPDX license identifier
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Basic analysis results
|
|
114
|
+
"""
|
|
115
|
+
# Default analysis structure
|
|
116
|
+
analysis = {
|
|
117
|
+
"license_id": license_id,
|
|
118
|
+
"category": "permissive",
|
|
119
|
+
"permissions": {
|
|
120
|
+
"commercial_use": True,
|
|
121
|
+
"distribution": True,
|
|
122
|
+
"modification": True,
|
|
123
|
+
"patent_grant": False,
|
|
124
|
+
"private_use": True
|
|
125
|
+
},
|
|
126
|
+
"conditions": {
|
|
127
|
+
"disclose_source": False,
|
|
128
|
+
"include_license": True,
|
|
129
|
+
"include_copyright": True,
|
|
130
|
+
"include_notice": False,
|
|
131
|
+
"state_changes": False,
|
|
132
|
+
"same_license": False,
|
|
133
|
+
"network_use_disclosure": False
|
|
134
|
+
},
|
|
135
|
+
"limitations": {
|
|
136
|
+
"liability": True,
|
|
137
|
+
"warranty": True,
|
|
138
|
+
"trademark_use": False
|
|
139
|
+
},
|
|
140
|
+
"compatibility": {
|
|
141
|
+
"can_combine_with_permissive": True,
|
|
142
|
+
"can_combine_with_weak_copyleft": True,
|
|
143
|
+
"can_combine_with_strong_copyleft": False,
|
|
144
|
+
"static_linking_restrictions": "none",
|
|
145
|
+
"dynamic_linking_restrictions": "none"
|
|
146
|
+
},
|
|
147
|
+
"obligations": ["Include license text", "Include copyright notice"],
|
|
148
|
+
"key_requirements": ["Attribution required"]
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Customize based on known patterns
|
|
152
|
+
if "GPL" in license_id:
|
|
153
|
+
analysis["category"] = "copyleft_strong"
|
|
154
|
+
analysis["conditions"]["disclose_source"] = True
|
|
155
|
+
analysis["conditions"]["same_license"] = True
|
|
156
|
+
analysis["compatibility"]["can_combine_with_strong_copyleft"] = True
|
|
157
|
+
analysis["compatibility"]["can_combine_with_permissive"] = False
|
|
158
|
+
analysis["compatibility"]["static_linking_restrictions"] = "strong"
|
|
159
|
+
analysis["obligations"] = [
|
|
160
|
+
"Disclose source code",
|
|
161
|
+
"Include license text",
|
|
162
|
+
"State changes",
|
|
163
|
+
"Use same license for derivatives"
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
elif "LGPL" in license_id:
|
|
167
|
+
analysis["category"] = "copyleft_weak"
|
|
168
|
+
analysis["conditions"]["disclose_source"] = True
|
|
169
|
+
analysis["compatibility"]["static_linking_restrictions"] = "weak"
|
|
170
|
+
analysis["obligations"] = [
|
|
171
|
+
"Disclose source of LGPL components",
|
|
172
|
+
"Allow relinking",
|
|
173
|
+
"Include license text"
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
elif "AGPL" in license_id:
|
|
177
|
+
analysis["category"] = "copyleft_strong"
|
|
178
|
+
analysis["conditions"]["disclose_source"] = True
|
|
179
|
+
analysis["conditions"]["same_license"] = True
|
|
180
|
+
analysis["conditions"]["network_use_disclosure"] = True
|
|
181
|
+
analysis["compatibility"]["static_linking_restrictions"] = "strong"
|
|
182
|
+
|
|
183
|
+
elif "Apache" in license_id:
|
|
184
|
+
analysis["category"] = "permissive"
|
|
185
|
+
analysis["permissions"]["patent_grant"] = True
|
|
186
|
+
analysis["conditions"]["include_notice"] = True
|
|
187
|
+
analysis["conditions"]["state_changes"] = True
|
|
188
|
+
|
|
189
|
+
elif "MIT" in license_id or "BSD" in license_id or "ISC" in license_id:
|
|
190
|
+
analysis["category"] = "permissive"
|
|
191
|
+
|
|
192
|
+
elif "CC0" in license_id or "Unlicense" in license_id:
|
|
193
|
+
analysis["category"] = "public_domain"
|
|
194
|
+
analysis["conditions"]["include_license"] = False
|
|
195
|
+
analysis["conditions"]["include_copyright"] = False
|
|
196
|
+
analysis["obligations"] = []
|
|
197
|
+
|
|
198
|
+
return analysis
|
|
199
|
+
|
|
200
|
+
async def extract_compatibility_rules(self, license_id: str, analysis: Dict[str, Any]) -> Dict[str, Any]:
|
|
201
|
+
"""
|
|
202
|
+
Extract detailed compatibility rules for a license.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
license_id: SPDX license identifier
|
|
206
|
+
analysis: License analysis results
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Compatibility rules
|
|
210
|
+
"""
|
|
211
|
+
if not self.llm_provider:
|
|
212
|
+
return self._get_default_compatibility_rules(license_id, analysis)
|
|
213
|
+
|
|
214
|
+
return await self.llm_provider.extract_compatibility_rules(license_id, analysis)
|
|
215
|
+
|
|
216
|
+
def _get_default_compatibility_rules(self, license_id: str, analysis: Dict[str, Any]) -> Dict[str, Any]:
|
|
217
|
+
"""Get default compatibility rules based on license category."""
|
|
218
|
+
category = analysis.get("category", "permissive")
|
|
219
|
+
|
|
220
|
+
if category == "permissive":
|
|
221
|
+
return {
|
|
222
|
+
"static_linking": {
|
|
223
|
+
"compatible_with": ["category:any"],
|
|
224
|
+
"incompatible_with": [],
|
|
225
|
+
"requires_review": []
|
|
226
|
+
},
|
|
227
|
+
"dynamic_linking": {
|
|
228
|
+
"compatible_with": ["category:any"],
|
|
229
|
+
"incompatible_with": [],
|
|
230
|
+
"requires_review": []
|
|
231
|
+
},
|
|
232
|
+
"distribution": {
|
|
233
|
+
"can_distribute_with": ["category:any"],
|
|
234
|
+
"cannot_distribute_with": [],
|
|
235
|
+
"special_requirements": ["Include license and copyright notice"]
|
|
236
|
+
},
|
|
237
|
+
"contamination_effect": "none",
|
|
238
|
+
"notes": "Permissive license with minimal restrictions"
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
elif category == "copyleft_strong":
|
|
242
|
+
return {
|
|
243
|
+
"static_linking": {
|
|
244
|
+
"compatible_with": [license_id, "category:copyleft_strong"],
|
|
245
|
+
"incompatible_with": ["category:permissive", "category:proprietary"],
|
|
246
|
+
"requires_review": ["category:copyleft_weak"]
|
|
247
|
+
},
|
|
248
|
+
"dynamic_linking": {
|
|
249
|
+
"compatible_with": ["category:any"],
|
|
250
|
+
"incompatible_with": [],
|
|
251
|
+
"requires_review": ["category:proprietary"]
|
|
252
|
+
},
|
|
253
|
+
"distribution": {
|
|
254
|
+
"can_distribute_with": [license_id],
|
|
255
|
+
"cannot_distribute_with": ["category:proprietary"],
|
|
256
|
+
"special_requirements": ["Source code must be provided", "Same license required"]
|
|
257
|
+
},
|
|
258
|
+
"contamination_effect": "full",
|
|
259
|
+
"notes": "Strong copyleft with viral effect"
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
elif category == "copyleft_weak":
|
|
263
|
+
return {
|
|
264
|
+
"static_linking": {
|
|
265
|
+
"compatible_with": ["category:permissive", license_id],
|
|
266
|
+
"incompatible_with": [],
|
|
267
|
+
"requires_review": ["category:copyleft_strong"]
|
|
268
|
+
},
|
|
269
|
+
"dynamic_linking": {
|
|
270
|
+
"compatible_with": ["category:any"],
|
|
271
|
+
"incompatible_with": [],
|
|
272
|
+
"requires_review": []
|
|
273
|
+
},
|
|
274
|
+
"distribution": {
|
|
275
|
+
"can_distribute_with": ["category:any"],
|
|
276
|
+
"cannot_distribute_with": [],
|
|
277
|
+
"special_requirements": ["Allow relinking", "Provide LGPL source"]
|
|
278
|
+
},
|
|
279
|
+
"contamination_effect": "module",
|
|
280
|
+
"notes": "Weak copyleft affecting only the library itself"
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
else:
|
|
284
|
+
return {
|
|
285
|
+
"static_linking": {
|
|
286
|
+
"compatible_with": ["category:any"],
|
|
287
|
+
"incompatible_with": [],
|
|
288
|
+
"requires_review": []
|
|
289
|
+
},
|
|
290
|
+
"dynamic_linking": {
|
|
291
|
+
"compatible_with": ["category:any"],
|
|
292
|
+
"incompatible_with": [],
|
|
293
|
+
"requires_review": []
|
|
294
|
+
},
|
|
295
|
+
"distribution": {
|
|
296
|
+
"can_distribute_with": ["category:any"],
|
|
297
|
+
"cannot_distribute_with": [],
|
|
298
|
+
"special_requirements": []
|
|
299
|
+
},
|
|
300
|
+
"contamination_effect": "none",
|
|
301
|
+
"notes": "Default compatibility rules"
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
async def batch_analyze(self, licenses: List[Dict[str, Any]], max_concurrent: int = 5) -> List[Dict[str, Any]]:
|
|
305
|
+
"""
|
|
306
|
+
Analyze multiple licenses concurrently.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
licenses: List of license data with id and text
|
|
310
|
+
max_concurrent: Maximum concurrent analyses
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
List of analysis results
|
|
314
|
+
"""
|
|
315
|
+
results = []
|
|
316
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
317
|
+
|
|
318
|
+
async def analyze_with_semaphore(license_data):
|
|
319
|
+
async with semaphore:
|
|
320
|
+
license_id = license_data.get("id")
|
|
321
|
+
license_text = license_data.get("text", "")
|
|
322
|
+
|
|
323
|
+
logger.info(f"Analyzing {license_id}")
|
|
324
|
+
|
|
325
|
+
# Basic analysis
|
|
326
|
+
analysis = await self.analyze_license(license_id, license_text)
|
|
327
|
+
|
|
328
|
+
# Extract compatibility rules
|
|
329
|
+
compatibility = await self.extract_compatibility_rules(license_id, analysis)
|
|
330
|
+
analysis["compatibility_rules"] = compatibility
|
|
331
|
+
|
|
332
|
+
return analysis
|
|
333
|
+
|
|
334
|
+
# Process all licenses
|
|
335
|
+
tasks = [analyze_with_semaphore(lic) for lic in licenses]
|
|
336
|
+
results = await asyncio.gather(*tasks)
|
|
337
|
+
|
|
338
|
+
return results
|