koreshield 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,7 @@ from langchain_core.messages import BaseMessage
7
7
 
8
8
  from ..client import KoreShieldClient
9
9
  from ..async_client import AsyncKoreShieldClient
10
- from ..types import (
11
- DetectionResult,
12
- ThreatLevel,
13
- RAGDocument,
14
- RAGScanResponse,
15
- RAGScanConfig,
16
- )
10
+ from ..types import DetectionResult, ThreatLevel
17
11
  from ..exceptions import KoreShieldError
18
12
 
19
13
 
@@ -278,193 +272,4 @@ def create_async_koreshield_callback(
278
272
  block_on_threat=block_on_threat,
279
273
  threat_threshold=threat_threshold,
280
274
  **kwargs
281
- )
282
-
283
-
284
- # RAG Document Scanning Support
285
-
286
- class SecureRetriever:
287
- """Wrapper for LangChain retrievers that adds automatic RAG security scanning.
288
-
289
- This class wraps any LangChain retriever and automatically scans retrieved
290
- documents for indirect prompt injection attacks before returning them.
291
-
292
- Example:
293
- ```python
294
- from langchain.vectorstores import Chroma
295
- from koreshield_sdk.integrations.langchain import SecureRetriever
296
-
297
- # Original retriever
298
- base_retriever = vectorstore.as_retriever()
299
-
300
- # Wrap with security
301
- secure_retriever = SecureRetriever(
302
- retriever=base_retriever,
303
- koreshield_api_key="your-key",
304
- block_threats=True,
305
- min_confidence=0.3
306
- )
307
-
308
- # Use as normal - automatic scanning
309
- docs = secure_retriever.get_relevant_documents("user query")
310
- # Threatening documents are automatically filtered
311
- ```
312
- """
313
-
314
- def __init__(
315
- self,
316
- retriever: Any,
317
- koreshield_api_key: str,
318
- koreshield_base_url: str = "http://localhost:8000",
319
- block_threats: bool = True,
320
- min_confidence: float = 0.3,
321
- enable_cross_document_analysis: bool = True,
322
- log_threats: bool = True,
323
- ):
324
- """Initialize secure retriever.
325
-
326
- Args:
327
- retriever: Base LangChain retriever to wrap
328
- koreshield_api_key: KoreShield API key
329
- koreshield_base_url: API base URL
330
- block_threats: Whether to filter threatening documents
331
- min_confidence: Threat confidence threshold (0.0-1.0)
332
- enable_cross_document_analysis: Enable multi-doc threat detection
333
- log_threats: Log detected threats
334
- """
335
- self.retriever = retriever
336
- self.koreshield = KoreShieldClient(
337
- api_key=koreshield_api_key,
338
- base_url=koreshield_base_url
339
- )
340
- self.block_threats = block_threats
341
- self.min_confidence = min_confidence
342
- self.enable_cross_document_analysis = enable_cross_document_analysis
343
- self.log_threats = log_threats
344
-
345
- # Statistics
346
- self.total_scans = 0
347
- self.total_threats_detected = 0
348
- self.total_documents_blocked = 0
349
-
350
- def get_relevant_documents(self, query: str) -> List[Any]:
351
- """Retrieve and scan documents.
352
-
353
- Args:
354
- query: User's query
355
-
356
- Returns:
357
- List of LangChain documents (threats filtered if enabled)
358
- """
359
- # Retrieve documents
360
- documents = self.retriever.get_relevant_documents(query)
361
-
362
- if not documents:
363
- return documents
364
-
365
- # Convert to RAG documents
366
- rag_documents = []
367
- for idx, doc in enumerate(documents):
368
- rag_doc = RAGDocument(
369
- id=doc.metadata.get("id", f"doc_{idx}"),
370
- content=doc.page_content,
371
- metadata=doc.metadata
372
- )
373
- rag_documents.append(rag_doc)
374
-
375
- # Scan with KoreShield
376
- config = RAGScanConfig(
377
- min_confidence=self.min_confidence,
378
- enable_cross_document_analysis=self.enable_cross_document_analysis
379
- )
380
-
381
- result = self.koreshield.scan_rag_context(
382
- user_query=query,
383
- documents=rag_documents,
384
- config=config
385
- )
386
-
387
- self.total_scans += 1
388
-
389
- # Handle threats
390
- if not result.is_safe:
391
- self.total_threats_detected += 1
392
-
393
- if self.log_threats:
394
- print(f"[KoreShield] RAG threat detected: {result.overall_severity}")
395
- print(f"[KoreShield] Confidence: {result.overall_confidence:.2f}")
396
- print(f"[KoreShield] Vectors: {result.taxonomy.injection_vectors}")
397
-
398
- if self.block_threats:
399
- # Filter out threatening documents
400
- safe_rag_docs = result.get_safe_documents(rag_documents)
401
- safe_ids = {doc.id for doc in safe_rag_docs}
402
-
403
- filtered_docs = [
404
- doc for idx, doc in enumerate(documents)
405
- if rag_documents[idx].id in safe_ids
406
- ]
407
-
408
- blocked_count = len(documents) - len(filtered_docs)
409
- self.total_documents_blocked += blocked_count
410
-
411
- if self.log_threats:
412
- print(f"[KoreShield] Filtered {blocked_count} threatening documents")
413
-
414
- return filtered_docs
415
-
416
- return documents
417
-
418
- def get_stats(self) -> Dict[str, Any]:
419
- """Get retriever statistics.
420
-
421
- Returns:
422
- Dictionary with scan statistics
423
- """
424
- return {
425
- "total_scans": self.total_scans,
426
- "total_threats_detected": self.total_threats_detected,
427
- "total_documents_blocked": self.total_documents_blocked,
428
- "threat_detection_rate": (
429
- self.total_threats_detected / self.total_scans
430
- if self.total_scans > 0 else 0.0
431
- )
432
- }
433
-
434
-
435
- def secure_retriever(
436
- retriever: Any,
437
- api_key: str,
438
- base_url: str = "http://localhost:8000",
439
- **kwargs
440
- ) -> SecureRetriever:
441
- """Create a secure retriever from any LangChain retriever.
442
-
443
- Args:
444
- retriever: Base LangChain retriever
445
- api_key: KoreShield API key
446
- base_url: KoreShield API base URL
447
- **kwargs: Additional SecureRetriever arguments
448
-
449
- Returns:
450
- SecureRetriever instance
451
-
452
- Example:
453
- ```python
454
- from koreshield_sdk.integrations.langchain import secure_retriever
455
-
456
- safe_retriever = secure_retriever(
457
- vectorstore.as_retriever(),
458
- api_key="your-key",
459
- block_threats=True
460
- )
461
-
462
- docs = safe_retriever.get_relevant_documents("user query")
463
- ```
464
- """
465
- return SecureRetriever(
466
- retriever,
467
- koreshield_api_key=api_key,
468
- koreshield_base_url=base_url,
469
- **kwargs
470
275
  )
koreshield_sdk/types.py CHANGED
@@ -88,161 +88,55 @@ class BatchScanResponse(BaseModel):
88
88
  processing_time_ms: float
89
89
  request_id: str
90
90
  timestamp: str
91
+ version: str
91
92
 
92
93
 
93
- # RAG Detection Types
94
-
95
- class InjectionVector(str, Enum):
96
- """RAG injection vector taxonomy."""
97
- EMAIL = "email"
98
- DOCUMENT = "document"
99
- WEB_SCRAPING = "web_scraping"
100
- DATABASE = "database"
101
- CHAT_MESSAGE = "chat_message"
102
- CUSTOMER_SUPPORT = "customer_support"
103
- KNOWLEDGE_BASE = "knowledge_base"
104
- API_INTEGRATION = "api_integration"
105
- UNKNOWN = "unknown"
106
-
107
-
108
- class OperationalTarget(str, Enum):
109
- """RAG operational target taxonomy."""
110
- DATA_EXFILTRATION = "data_exfiltration"
111
- PRIVILEGE_ESCALATION = "privilege_escalation"
112
- ACCESS_CONTROL_BYPASS = "access_control_bypass"
113
- CONTEXT_POISONING = "context_poisoning"
114
- SYSTEM_PROMPT_LEAKING = "system_prompt_leaking"
115
- MISINFORMATION = "misinformation"
116
- RECONNAISSANCE = "reconnaissance"
117
- UNKNOWN = "unknown"
118
-
119
-
120
- class PersistenceMechanism(str, Enum):
121
- """RAG persistence mechanism taxonomy."""
122
- SINGLE_TURN = "single_turn"
123
- MULTI_TURN = "multi_turn"
124
- CONTEXT_PERSISTENCE = "context_persistence"
125
- NON_PERSISTENT = "non_persistent"
126
-
127
-
128
- class EnterpriseContext(str, Enum):
129
- """Enterprise context taxonomy."""
130
- CRM = "crm"
131
- SALES = "sales"
132
- CUSTOMER_SUPPORT = "customer_support"
133
- MARKETING = "marketing"
134
- HEALTHCARE = "healthcare"
135
- FINANCIAL_SERVICES = "financial_services"
136
- GENERAL = "general"
137
-
138
-
139
- class DetectionComplexity(str, Enum):
140
- """Detection complexity taxonomy."""
141
- LOW = "low"
142
- MEDIUM = "medium"
143
- HIGH = "high"
144
-
145
-
146
- class RAGDocument(BaseModel):
147
- """Document to be scanned in RAG context."""
148
- id: str
94
+ class StreamingScanRequest(BaseModel):
95
+ """Request for streaming security scanning."""
149
96
  content: str
150
- metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
97
+ chunk_size: int = 1000
98
+ overlap: int = 100
99
+ context: Optional[Dict[str, Any]] = None
100
+ user_id: Optional[str] = None
101
+ session_id: Optional[str] = None
102
+ metadata: Optional[Dict[str, Any]] = None
151
103
 
152
104
  model_config = ConfigDict(extra="allow")
153
105
 
154
106
 
155
- class DocumentThreat(BaseModel):
156
- """Individual document-level threat."""
157
- document_id: str
158
- severity: ThreatLevel
159
- confidence: float
160
- patterns_matched: List[str]
161
- injection_vectors: List[InjectionVector]
162
- operational_targets: List[OperationalTarget]
163
- metadata: Optional[Dict[str, Any]] = None
107
+ class StreamingScanResponse(BaseModel):
108
+ """Response from streaming security scanning."""
109
+ chunk_results: List[DetectionResult]
110
+ overall_result: DetectionResult
111
+ total_chunks: int
112
+ processing_time_ms: float
113
+ request_id: str
114
+ timestamp: str
115
+ version: str
164
116
 
165
117
 
166
- class CrossDocumentThreat(BaseModel):
167
- """Cross-document threat detected across multiple documents."""
168
- threat_type: str # "staged_attack", "coordinated_instructions", "temporal_chain"
169
- severity: ThreatLevel
170
- confidence: float
171
- document_ids: List[str]
172
- description: str
173
- patterns: List[str]
118
+ class SecurityPolicy(BaseModel):
119
+ """Custom security policy configuration."""
120
+ name: str
121
+ description: Optional[str] = None
122
+ threat_threshold: ThreatLevel = ThreatLevel.MEDIUM
123
+ blocked_detection_types: List[DetectionType] = Field(default_factory=list)
124
+ custom_rules: List[Dict[str, Any]] = Field(default_factory=list)
125
+ allowlist_patterns: List[str] = Field(default_factory=list)
126
+ blocklist_patterns: List[str] = Field(default_factory=list)
174
127
  metadata: Optional[Dict[str, Any]] = None
175
128
 
176
129
 
177
- class TaxonomyClassification(BaseModel):
178
- """5-dimensional taxonomy classification."""
179
- injection_vectors: List[InjectionVector]
180
- operational_targets: List[OperationalTarget]
181
- persistence_mechanisms: List[PersistenceMechanism]
182
- enterprise_contexts: List[EnterpriseContext]
183
- detection_complexity: DetectionComplexity
184
-
185
-
186
- class ContextAnalysis(BaseModel):
187
- """RAG context analysis results."""
188
- document_threats: List[DocumentThreat]
189
- cross_document_threats: List[CrossDocumentThreat]
190
- statistics: Dict[str, Any]
191
-
192
-
193
- class RAGScanResponse(BaseModel):
194
- """Response from RAG context scanning."""
195
- is_safe: bool
196
- overall_severity: ThreatLevel
197
- overall_confidence: float
198
- taxonomy: TaxonomyClassification
199
- context_analysis: ContextAnalysis
200
- request_id: Optional[str] = None
201
- timestamp: Optional[str] = None
202
-
203
- def get_threat_document_ids(self) -> List[str]:
204
- """Get list of document IDs with detected threats.
205
-
206
- Returns:
207
- List of document IDs that contain threats
208
- """
209
- threat_ids = set()
210
-
211
- # From document-level threats
212
- for threat in self.context_analysis.document_threats:
213
- threat_ids.add(threat.document_id)
214
-
215
- # From cross-document threats
216
- for threat in self.context_analysis.cross_document_threats:
217
- threat_ids.update(threat.document_ids)
218
-
219
- return list(threat_ids)
220
-
221
- def get_safe_documents(self, original_documents: List[RAGDocument]) -> List[RAGDocument]:
222
- """Filter out threatening documents.
223
-
224
- Args:
225
- original_documents: Original list of documents scanned
226
-
227
- Returns:
228
- List of documents without detected threats
229
- """
230
- threat_ids = set(self.get_threat_document_ids())
231
- return [doc for doc in original_documents if doc.id not in threat_ids]
232
-
233
- def has_critical_threats(self) -> bool:
234
- """Check if critical threats were detected.
235
-
236
- Returns:
237
- True if any critical severity threats found
238
- """
239
- return self.overall_severity == ThreatLevel.CRITICAL
240
-
241
-
242
- class RAGScanRequest(BaseModel):
243
- """Request for RAG context scanning"""
244
- user_query: str
245
- documents: List[RAGDocument]
246
- config: Optional[Dict[str, Any]] = Field(default_factory=dict)
247
-
248
- model_config = ConfigDict(extra="allow")
130
+ class PerformanceMetrics(BaseModel):
131
+ """SDK performance and usage metrics."""
132
+ total_requests: int = 0
133
+ total_processing_time_ms: float = 0.0
134
+ average_response_time_ms: float = 0.0
135
+ requests_per_second: float = 0.0
136
+ error_count: int = 0
137
+ cache_hit_rate: float = 0.0
138
+ batch_efficiency: float = 0.0
139
+ streaming_chunks_processed: int = 0
140
+ uptime_seconds: float = 0.0
141
+ memory_usage_mb: Optional[float] = None
142
+ custom_metrics: Dict[str, Any] = Field(default_factory=dict)
@@ -1,13 +0,0 @@
1
- koreshield-0.1.5.dist-info/licenses/LICENSE,sha256=k3qeCwQxhbOO1GtxA10Do4-_veQzgflqjOp5uZD5mug,1071
2
- koreshield_sdk/__init__.py,sha256=JXErgUsoxTgM4EU--Os4ZTobARKWj1Mfurln-hNgCQw,785
3
- koreshield_sdk/async_client.py,sha256=WF4MQVefUJs-YpjVE4qkrP5P9vT6wb5qFJdsdebtOtc,14877
4
- koreshield_sdk/client.py,sha256=LHuCrHwugzDeoMY5bxmYRmIyRUwJUNgL_Vv3f5ncqpE,13217
5
- koreshield_sdk/exceptions.py,sha256=3j1FR4VFbe1Vv4i0bofBgQ_ZGwBfpOInBd9OyNQFUxo,945
6
- koreshield_sdk/py.typed,sha256=8ZJUsxZiuOy1oJeVhsTWQhTG_6pTVHVXk5hJL79ebTk,25
7
- koreshield_sdk/types.py,sha256=UabFBswT4ckPt2Umwl9FqOBSpPl6RN4FWJPl5qDn5cc,7034
8
- koreshield_sdk/integrations/__init__.py,sha256=po_sLSND55Wdu1vDmx4Nrjm072HLf04yxmtWj43yv7Y,382
9
- koreshield_sdk/integrations/langchain.py,sha256=w3BXs3tVk7R4ldFPhAm7qXbJPsHoamY3z2Ke0WPBVas,16542
10
- koreshield-0.1.5.dist-info/METADATA,sha256=XqNTIRL56qucFtHk2U0l7sfvtWSmCfGMiPwUTslFQ6A,15408
11
- koreshield-0.1.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
12
- koreshield-0.1.5.dist-info/top_level.txt,sha256=ePw2ZI3SrHZ5CaTRCyj3aya3j_qTcmRAQjoU7s3gAdM,15
13
- koreshield-0.1.5.dist-info/RECORD,,