ospac 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ospac might be problematic. Click here for more details.

@@ -0,0 +1,338 @@
1
+ """
2
+ LLM-based license analyzer with configurable providers.
3
+ Supports OpenAI, Anthropic Claude, and local Ollama.
4
+ Analyzes licenses to extract obligations, compatibility rules, and classifications.
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, List, Any, Optional
9
+ import asyncio
10
+ import os
11
+
12
+ from ospac.pipeline.llm_providers import (
13
+ LLMConfig,
14
+ LLMProvider,
15
+ create_llm_provider
16
+ )
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class LicenseAnalyzer:
22
+ """
23
+ Analyze licenses using configurable LLM providers.
24
+ Supports OpenAI, Anthropic Claude, and local Ollama.
25
+ """
26
+
27
+ def __init__(self, provider: str = "ollama", model: str = None, api_key: str = None, **kwargs):
28
+ """
29
+ Initialize the license analyzer with specified provider.
30
+
31
+ Args:
32
+ provider: LLM provider ("openai", "claude", "ollama")
33
+ model: Model name (auto-selected if not provided)
34
+ api_key: API key for cloud providers (or from environment)
35
+ **kwargs: Additional provider-specific configuration
36
+ """
37
+ self.provider_name = provider.lower()
38
+
39
+ # Auto-select models if not provided
40
+ if not model:
41
+ model = self._get_default_model(self.provider_name)
42
+
43
+ # Get API key from environment if not provided
44
+ if not api_key:
45
+ api_key = self._get_api_key_from_env(self.provider_name)
46
+
47
+ # Create configuration
48
+ self.config = LLMConfig(
49
+ provider=self.provider_name,
50
+ model=model,
51
+ api_key=api_key,
52
+ **kwargs
53
+ )
54
+
55
+ # Initialize provider
56
+ try:
57
+ self.llm_provider = create_llm_provider(self.config)
58
+ logger.info(f"Initialized {self.provider_name} provider with model {model}")
59
+ except Exception as e:
60
+ logger.error(f"Failed to initialize {self.provider_name} provider: {e}")
61
+ self.llm_provider = None
62
+
63
+ def _get_default_model(self, provider: str) -> str:
64
+ """Get default model for each provider."""
65
+ defaults = {
66
+ "openai": "gpt-4o-mini",
67
+ "claude": "claude-3-haiku-20240307",
68
+ "ollama": "llama3:latest"
69
+ }
70
+ return defaults.get(provider, "gpt-4o-mini")
71
+
72
+ def _get_api_key_from_env(self, provider: str) -> Optional[str]:
73
+ """Get API key from environment variables."""
74
+ env_vars = {
75
+ "openai": "OPENAI_API_KEY",
76
+ "claude": "ANTHROPIC_API_KEY",
77
+ "ollama": None # No API key needed for local Ollama
78
+ }
79
+
80
+ env_var = env_vars.get(provider)
81
+ if env_var:
82
+ api_key = os.getenv(env_var)
83
+ if not api_key:
84
+ logger.warning(f"No API key found in environment variable {env_var}")
85
+ return api_key
86
+ return None
87
+
88
+ async def analyze_license(self, license_id: str, license_text: str) -> Dict[str, Any]:
89
+ """
90
+ Analyze a license using the configured LLM provider.
91
+
92
+ Args:
93
+ license_id: SPDX license identifier
94
+ license_text: Full license text
95
+
96
+ Returns:
97
+ Analysis results
98
+ """
99
+ if not self.llm_provider:
100
+ logger.warning(f"LLM provider not available, returning fallback for {license_id}")
101
+ return self._get_fallback_analysis(license_id)
102
+
103
+ return await self.llm_provider.analyze_license(license_id, license_text)
104
+
105
+ def _get_fallback_analysis(self, license_id: str) -> Dict[str, Any]:
106
+ """
107
+ Get fallback analysis based on known license patterns.
108
+
109
+ Args:
110
+ license_id: SPDX license identifier
111
+
112
+ Returns:
113
+ Basic analysis results
114
+ """
115
+ # Default analysis structure
116
+ analysis = {
117
+ "license_id": license_id,
118
+ "category": "permissive",
119
+ "permissions": {
120
+ "commercial_use": True,
121
+ "distribution": True,
122
+ "modification": True,
123
+ "patent_grant": False,
124
+ "private_use": True
125
+ },
126
+ "conditions": {
127
+ "disclose_source": False,
128
+ "include_license": True,
129
+ "include_copyright": True,
130
+ "include_notice": False,
131
+ "state_changes": False,
132
+ "same_license": False,
133
+ "network_use_disclosure": False
134
+ },
135
+ "limitations": {
136
+ "liability": True,
137
+ "warranty": True,
138
+ "trademark_use": False
139
+ },
140
+ "compatibility": {
141
+ "can_combine_with_permissive": True,
142
+ "can_combine_with_weak_copyleft": True,
143
+ "can_combine_with_strong_copyleft": False,
144
+ "static_linking_restrictions": "none",
145
+ "dynamic_linking_restrictions": "none"
146
+ },
147
+ "obligations": ["Include license text", "Include copyright notice"],
148
+ "key_requirements": ["Attribution required"]
149
+ }
150
+
151
+ # Customize based on known patterns
152
+ if "GPL" in license_id:
153
+ analysis["category"] = "copyleft_strong"
154
+ analysis["conditions"]["disclose_source"] = True
155
+ analysis["conditions"]["same_license"] = True
156
+ analysis["compatibility"]["can_combine_with_strong_copyleft"] = True
157
+ analysis["compatibility"]["can_combine_with_permissive"] = False
158
+ analysis["compatibility"]["static_linking_restrictions"] = "strong"
159
+ analysis["obligations"] = [
160
+ "Disclose source code",
161
+ "Include license text",
162
+ "State changes",
163
+ "Use same license for derivatives"
164
+ ]
165
+
166
+ elif "LGPL" in license_id:
167
+ analysis["category"] = "copyleft_weak"
168
+ analysis["conditions"]["disclose_source"] = True
169
+ analysis["compatibility"]["static_linking_restrictions"] = "weak"
170
+ analysis["obligations"] = [
171
+ "Disclose source of LGPL components",
172
+ "Allow relinking",
173
+ "Include license text"
174
+ ]
175
+
176
+ elif "AGPL" in license_id:
177
+ analysis["category"] = "copyleft_strong"
178
+ analysis["conditions"]["disclose_source"] = True
179
+ analysis["conditions"]["same_license"] = True
180
+ analysis["conditions"]["network_use_disclosure"] = True
181
+ analysis["compatibility"]["static_linking_restrictions"] = "strong"
182
+
183
+ elif "Apache" in license_id:
184
+ analysis["category"] = "permissive"
185
+ analysis["permissions"]["patent_grant"] = True
186
+ analysis["conditions"]["include_notice"] = True
187
+ analysis["conditions"]["state_changes"] = True
188
+
189
+ elif "MIT" in license_id or "BSD" in license_id or "ISC" in license_id:
190
+ analysis["category"] = "permissive"
191
+
192
+ elif "CC0" in license_id or "Unlicense" in license_id:
193
+ analysis["category"] = "public_domain"
194
+ analysis["conditions"]["include_license"] = False
195
+ analysis["conditions"]["include_copyright"] = False
196
+ analysis["obligations"] = []
197
+
198
+ return analysis
199
+
200
+ async def extract_compatibility_rules(self, license_id: str, analysis: Dict[str, Any]) -> Dict[str, Any]:
201
+ """
202
+ Extract detailed compatibility rules for a license.
203
+
204
+ Args:
205
+ license_id: SPDX license identifier
206
+ analysis: License analysis results
207
+
208
+ Returns:
209
+ Compatibility rules
210
+ """
211
+ if not self.llm_provider:
212
+ return self._get_default_compatibility_rules(license_id, analysis)
213
+
214
+ return await self.llm_provider.extract_compatibility_rules(license_id, analysis)
215
+
216
+ def _get_default_compatibility_rules(self, license_id: str, analysis: Dict[str, Any]) -> Dict[str, Any]:
217
+ """Get default compatibility rules based on license category."""
218
+ category = analysis.get("category", "permissive")
219
+
220
+ if category == "permissive":
221
+ return {
222
+ "static_linking": {
223
+ "compatible_with": ["category:any"],
224
+ "incompatible_with": [],
225
+ "requires_review": []
226
+ },
227
+ "dynamic_linking": {
228
+ "compatible_with": ["category:any"],
229
+ "incompatible_with": [],
230
+ "requires_review": []
231
+ },
232
+ "distribution": {
233
+ "can_distribute_with": ["category:any"],
234
+ "cannot_distribute_with": [],
235
+ "special_requirements": ["Include license and copyright notice"]
236
+ },
237
+ "contamination_effect": "none",
238
+ "notes": "Permissive license with minimal restrictions"
239
+ }
240
+
241
+ elif category == "copyleft_strong":
242
+ return {
243
+ "static_linking": {
244
+ "compatible_with": [license_id, "category:copyleft_strong"],
245
+ "incompatible_with": ["category:permissive", "category:proprietary"],
246
+ "requires_review": ["category:copyleft_weak"]
247
+ },
248
+ "dynamic_linking": {
249
+ "compatible_with": ["category:any"],
250
+ "incompatible_with": [],
251
+ "requires_review": ["category:proprietary"]
252
+ },
253
+ "distribution": {
254
+ "can_distribute_with": [license_id],
255
+ "cannot_distribute_with": ["category:proprietary"],
256
+ "special_requirements": ["Source code must be provided", "Same license required"]
257
+ },
258
+ "contamination_effect": "full",
259
+ "notes": "Strong copyleft with viral effect"
260
+ }
261
+
262
+ elif category == "copyleft_weak":
263
+ return {
264
+ "static_linking": {
265
+ "compatible_with": ["category:permissive", license_id],
266
+ "incompatible_with": [],
267
+ "requires_review": ["category:copyleft_strong"]
268
+ },
269
+ "dynamic_linking": {
270
+ "compatible_with": ["category:any"],
271
+ "incompatible_with": [],
272
+ "requires_review": []
273
+ },
274
+ "distribution": {
275
+ "can_distribute_with": ["category:any"],
276
+ "cannot_distribute_with": [],
277
+ "special_requirements": ["Allow relinking", "Provide LGPL source"]
278
+ },
279
+ "contamination_effect": "module",
280
+ "notes": "Weak copyleft affecting only the library itself"
281
+ }
282
+
283
+ else:
284
+ return {
285
+ "static_linking": {
286
+ "compatible_with": ["category:any"],
287
+ "incompatible_with": [],
288
+ "requires_review": []
289
+ },
290
+ "dynamic_linking": {
291
+ "compatible_with": ["category:any"],
292
+ "incompatible_with": [],
293
+ "requires_review": []
294
+ },
295
+ "distribution": {
296
+ "can_distribute_with": ["category:any"],
297
+ "cannot_distribute_with": [],
298
+ "special_requirements": []
299
+ },
300
+ "contamination_effect": "none",
301
+ "notes": "Default compatibility rules"
302
+ }
303
+
304
+ async def batch_analyze(self, licenses: List[Dict[str, Any]], max_concurrent: int = 5) -> List[Dict[str, Any]]:
305
+ """
306
+ Analyze multiple licenses concurrently.
307
+
308
+ Args:
309
+ licenses: List of license data with id and text
310
+ max_concurrent: Maximum concurrent analyses
311
+
312
+ Returns:
313
+ List of analysis results
314
+ """
315
+ results = []
316
+ semaphore = asyncio.Semaphore(max_concurrent)
317
+
318
+ async def analyze_with_semaphore(license_data):
319
+ async with semaphore:
320
+ license_id = license_data.get("id")
321
+ license_text = license_data.get("text", "")
322
+
323
+ logger.info(f"Analyzing {license_id}")
324
+
325
+ # Basic analysis
326
+ analysis = await self.analyze_license(license_id, license_text)
327
+
328
+ # Extract compatibility rules
329
+ compatibility = await self.extract_compatibility_rules(license_id, analysis)
330
+ analysis["compatibility_rules"] = compatibility
331
+
332
+ return analysis
333
+
334
+ # Process all licenses
335
+ tasks = [analyze_with_semaphore(lic) for lic in licenses]
336
+ results = await asyncio.gather(*tasks)
337
+
338
+ return results