api-mocker 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,482 @@
1
+ """
2
+ AI-Powered Mock Generation for API-Mocker.
3
+ """
4
+
5
+ import json
6
+ import time
7
+ import hashlib
8
+ from typing import Dict, List, Optional, Any, Union
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ import logging
12
+ from contextlib import contextmanager
13
+
14
+ try:
15
+ import openai
16
+ from openai import OpenAI
17
+ OPENAI_AVAILABLE = True
18
+ except ImportError:
19
+ OPENAI_AVAILABLE = False
20
+
21
+ try:
22
+ from faker import Faker
23
+ FAKER_AVAILABLE = True
24
+ except ImportError:
25
+ FAKER_AVAILABLE = False
26
+
27
+ import jsonschema
28
+ from pydantic import BaseModel, Field
29
+ import jinja2
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ @dataclass
34
+ class GenerationRequest:
35
+ """Request for AI-powered data generation."""
36
+ prompt: str
37
+ endpoint: str
38
+ method: str = "GET"
39
+ schema: Optional[Dict] = None
40
+ count: int = 1
41
+ language: str = "en"
42
+ context: Optional[Dict] = None
43
+
44
+ @dataclass
45
+ class GenerationResponse:
46
+ """Response from AI-powered data generation."""
47
+ data: Any
48
+ metadata: Dict[str, Any]
49
+ cache_key: Optional[str] = None
50
+ generation_time: float = 0.0
51
+
52
+ class SchemaAnalyzer:
53
+ """Analyzes API schemas to understand data structure."""
54
+
55
+ def __init__(self):
56
+ self.faker = Faker() if FAKER_AVAILABLE else None
57
+
58
+ def analyze_schema(self, schema: Dict) -> Dict[str, Any]:
59
+ """Analyze JSON schema to understand data types and patterns."""
60
+ analysis = {
61
+ "types": {},
62
+ "patterns": {},
63
+ "constraints": {},
64
+ "examples": {}
65
+ }
66
+
67
+ if not schema:
68
+ return analysis
69
+
70
+ self._analyze_object(schema, analysis)
71
+ return analysis
72
+
73
+ def _analyze_object(self, obj: Dict, analysis: Dict, path: str = ""):
74
+ """Recursively analyze schema object."""
75
+ if "type" in obj:
76
+ current_path = path or "root"
77
+ analysis["types"][current_path] = obj["type"]
78
+
79
+ # Analyze constraints
80
+ if "minLength" in obj or "maxLength" in obj:
81
+ analysis["constraints"][current_path] = {
82
+ "minLength": obj.get("minLength"),
83
+ "maxLength": obj.get("maxLength")
84
+ }
85
+
86
+ # Analyze patterns
87
+ if "pattern" in obj:
88
+ analysis["patterns"][current_path] = obj["pattern"]
89
+
90
+ # Store examples
91
+ if "example" in obj:
92
+ analysis["examples"][current_path] = obj["example"]
93
+
94
+ # Analyze properties for objects
95
+ if "properties" in obj:
96
+ for prop_name, prop_schema in obj["properties"].items():
97
+ prop_path = f"{path}.{prop_name}" if path else prop_name
98
+ self._analyze_object(prop_schema, analysis, prop_path)
99
+
100
+ # Analyze items for arrays
101
+ if "items" in obj:
102
+ items_path = f"{path}[items]" if path else "items"
103
+ self._analyze_object(obj["items"], analysis, items_path)
104
+
105
+ class AIGenerator:
106
+ """AI-powered data generator using OpenAI API."""
107
+
108
+ def __init__(self, api_key: Optional[str] = None, model: str = "gpt-3.5-turbo"):
109
+ self.api_key = api_key
110
+ self.model = model
111
+ self.client = None
112
+ self.schema_analyzer = SchemaAnalyzer()
113
+ self.cache: Dict[str, Any] = {}
114
+
115
+ if OPENAI_AVAILABLE and api_key:
116
+ if api_key.startswith('gsk_'):
117
+ # Groq API key
118
+ self.client = OpenAI(
119
+ api_key=api_key,
120
+ base_url="https://api.groq.com/openai/v1"
121
+ )
122
+ else:
123
+ # OpenAI API key
124
+ self.client = OpenAI(api_key=api_key)
125
+
126
+ def generate_data(self, request: GenerationRequest) -> GenerationResponse:
127
+ """Generate realistic data using AI."""
128
+ start_time = time.time()
129
+
130
+ # Check cache first
131
+ cache_key = self._generate_cache_key(request)
132
+ if cache_key in self.cache:
133
+ logger.info(f"Using cached data for {cache_key}")
134
+ return GenerationResponse(
135
+ data=self.cache[cache_key],
136
+ metadata={"source": "cache"},
137
+ cache_key=cache_key,
138
+ generation_time=0.0
139
+ )
140
+
141
+ try:
142
+ if self.client and OPENAI_AVAILABLE:
143
+ data = self._generate_with_ai(request)
144
+ source = "ai"
145
+ else:
146
+ data = self._generate_with_faker(request)
147
+ source = "faker"
148
+
149
+ # Cache the result
150
+ self.cache[cache_key] = data
151
+
152
+ generation_time = time.time() - start_time
153
+
154
+ return GenerationResponse(
155
+ data=data,
156
+ metadata={
157
+ "source": source,
158
+ "model": self.model if source == "ai" else "faker",
159
+ "cache_key": cache_key
160
+ },
161
+ cache_key=cache_key,
162
+ generation_time=generation_time
163
+ )
164
+
165
+ except Exception as e:
166
+ logger.error(f"Generation failed: {e}")
167
+ # Fallback to Faker
168
+ data = self._generate_with_faker(request)
169
+ generation_time = time.time() - start_time
170
+
171
+ return GenerationResponse(
172
+ data=data,
173
+ metadata={"source": "faker_fallback", "error": str(e)},
174
+ generation_time=generation_time
175
+ )
176
+
177
+ def _generate_with_ai(self, request: GenerationRequest) -> Any:
178
+ """Generate data using OpenAI API."""
179
+ if not self.client:
180
+ raise ValueError("OpenAI client not available")
181
+
182
+ # Build the prompt
183
+ prompt = self._build_ai_prompt(request)
184
+
185
+ # Call OpenAI API
186
+ response = self.client.chat.completions.create(
187
+ model=self.model,
188
+ messages=[
189
+ {
190
+ "role": "system",
191
+ "content": "You are an expert API developer. Generate realistic, diverse, and contextually appropriate mock data for API responses. Always return valid JSON."
192
+ },
193
+ {
194
+ "role": "user",
195
+ "content": prompt
196
+ }
197
+ ],
198
+ temperature=0.7,
199
+ max_tokens=2000
200
+ )
201
+
202
+ # Parse the response
203
+ content = response.choices[0].message.content
204
+ try:
205
+ # Try to extract JSON from the response
206
+ if "```json" in content:
207
+ json_start = content.find("```json") + 7
208
+ json_end = content.find("```", json_start)
209
+ json_str = content[json_start:json_end].strip()
210
+ else:
211
+ json_str = content.strip()
212
+
213
+ return json.loads(json_str)
214
+
215
+ except json.JSONDecodeError:
216
+ # If JSON parsing fails, try to fix common issues
217
+ return self._fix_json_response(content)
218
+
219
+ def _generate_with_faker(self, request: GenerationRequest) -> Any:
220
+ """Generate data using Faker as fallback."""
221
+ if not FAKER_AVAILABLE:
222
+ raise ValueError("Faker not available")
223
+
224
+ faker = Faker(request.language)
225
+
226
+ # Analyze schema if available
227
+ if request.schema:
228
+ analysis = self.schema_analyzer.analyze_schema(request.schema)
229
+ return self._generate_from_schema_analysis(analysis, faker, request.count)
230
+ else:
231
+ # Generate based on prompt keywords
232
+ return self._generate_from_prompt(request.prompt, faker, request.count)
233
+
234
+ def _build_ai_prompt(self, request: GenerationRequest) -> str:
235
+ """Build a comprehensive prompt for AI generation."""
236
+ prompt_parts = [
237
+ f"Generate realistic mock data for an API endpoint: {request.endpoint}",
238
+ f"HTTP Method: {request.method}",
239
+ f"Number of records: {request.count}",
240
+ f"User request: {request.prompt}"
241
+ ]
242
+
243
+ if request.schema:
244
+ prompt_parts.append(f"JSON Schema: {json.dumps(request.schema, indent=2)}")
245
+
246
+ if request.context:
247
+ prompt_parts.append(f"Context: {json.dumps(request.context, indent=2)}")
248
+
249
+ prompt_parts.extend([
250
+ "Requirements:",
251
+ "- Generate realistic, diverse data",
252
+ "- Follow the schema if provided",
253
+ "- Include edge cases and variations",
254
+ "- Make data contextually appropriate",
255
+ "- Return valid JSON only",
256
+ "- If generating multiple records, return an array"
257
+ ])
258
+
259
+ return "\n\n".join(prompt_parts)
260
+
261
+ def _generate_cache_key(self, request: GenerationRequest) -> str:
262
+ """Generate a cache key for the request."""
263
+ key_data = {
264
+ "prompt": request.prompt,
265
+ "endpoint": request.endpoint,
266
+ "method": request.method,
267
+ "count": request.count,
268
+ "language": request.language,
269
+ "schema_hash": hashlib.md5(json.dumps(request.schema or {}, sort_keys=True).encode()).hexdigest()[:8]
270
+ }
271
+ return hashlib.md5(json.dumps(key_data, sort_keys=True).encode()).hexdigest()
272
+
273
+ def _fix_json_response(self, content: str) -> Any:
274
+ """Attempt to fix common JSON formatting issues."""
275
+ # Remove markdown formatting
276
+ content = content.replace("```json", "").replace("```", "").strip()
277
+
278
+ # Try to find JSON-like content
279
+ import re
280
+ json_pattern = r'\{.*\}'
281
+ match = re.search(json_pattern, content, re.DOTALL)
282
+
283
+ if match:
284
+ try:
285
+ return json.loads(match.group())
286
+ except json.JSONDecodeError:
287
+ pass
288
+
289
+ # Last resort: return as string
290
+ return {"error": "Could not parse AI response", "raw_content": content}
291
+
292
+ def _generate_from_schema_analysis(self, analysis: Dict, faker: Faker, count: int) -> Any:
293
+ """Generate data based on schema analysis."""
294
+ if count == 1:
295
+ return self._generate_single_record(analysis, faker)
296
+ else:
297
+ return [self._generate_single_record(analysis, faker) for _ in range(count)]
298
+
299
+ def _generate_single_record(self, analysis: Dict, faker: Faker) -> Dict:
300
+ """Generate a single record based on schema analysis."""
301
+ record = {}
302
+
303
+ for field_path, field_type in analysis["types"].items():
304
+ if "." in field_path:
305
+ # Nested field
306
+ parts = field_path.split(".")
307
+ current = record
308
+ for part in parts[:-1]:
309
+ if part not in current:
310
+ current[part] = {}
311
+ current = current[part]
312
+ current[parts[-1]] = self._generate_field_value(field_type, faker)
313
+ else:
314
+ # Root field
315
+ record[field_path] = self._generate_field_value(field_type, faker)
316
+
317
+ return record
318
+
319
+ def _generate_field_value(self, field_type: str, faker: Faker) -> Any:
320
+ """Generate a value for a specific field type."""
321
+ if field_type == "string":
322
+ return faker.text(max_nb_chars=50)
323
+ elif field_type == "integer":
324
+ return faker.random_int(min=1, max=1000)
325
+ elif field_type == "number":
326
+ return faker.pyfloat(min_value=0, max_value=1000)
327
+ elif field_type == "boolean":
328
+ return faker.boolean()
329
+ elif field_type == "array":
330
+ return [faker.word() for _ in range(faker.random_int(min=1, max=5))]
331
+ elif field_type == "object":
332
+ return {"key": faker.word(), "value": faker.text(max_nb_chars=20)}
333
+ else:
334
+ return faker.word()
335
+
336
+ def _generate_from_prompt(self, prompt: str, faker: Faker, count: int) -> Any:
337
+ """Generate data based on prompt keywords."""
338
+ prompt_lower = prompt.lower()
339
+
340
+ # Detect common patterns
341
+ if "user" in prompt_lower or "person" in prompt_lower:
342
+ if count == 1:
343
+ return {
344
+ "id": faker.uuid4(),
345
+ "name": faker.name(),
346
+ "email": faker.email(),
347
+ "phone": faker.phone_number(),
348
+ "address": faker.address(),
349
+ "created_at": faker.iso8601()
350
+ }
351
+ else:
352
+ return [{
353
+ "id": faker.uuid4(),
354
+ "name": faker.name(),
355
+ "email": faker.email(),
356
+ "phone": faker.phone_number(),
357
+ "address": faker.address(),
358
+ "created_at": faker.iso8601()
359
+ } for _ in range(count)]
360
+
361
+ elif "product" in prompt_lower or "item" in prompt_lower:
362
+ if count == 1:
363
+ return {
364
+ "id": faker.uuid4(),
365
+ "name": faker.word() + " " + faker.word(),
366
+ "price": faker.pyfloat(min_value=1, max_value=1000),
367
+ "description": faker.text(max_nb_chars=100),
368
+ "category": faker.word(),
369
+ "in_stock": faker.boolean()
370
+ }
371
+ else:
372
+ return [{
373
+ "id": faker.uuid4(),
374
+ "name": faker.word() + " " + faker.word(),
375
+ "price": faker.pyfloat(min_value=1, max_value=1000),
376
+ "description": faker.text(max_nb_chars=100),
377
+ "category": faker.word(),
378
+ "in_stock": faker.boolean()
379
+ } for _ in range(count)]
380
+
381
+ else:
382
+ # Generic response
383
+ if count == 1:
384
+ return {
385
+ "id": faker.uuid4(),
386
+ "data": faker.text(max_nb_chars=50),
387
+ "timestamp": faker.iso8601(),
388
+ "status": "success"
389
+ }
390
+ else:
391
+ return [{
392
+ "id": faker.uuid4(),
393
+ "data": faker.text(max_nb_chars=50),
394
+ "timestamp": faker.iso8601(),
395
+ "status": "success"
396
+ } for _ in range(count)]
397
+
398
+ class TemplateEngine:
399
+ """Template engine for dynamic response generation."""
400
+
401
+ def __init__(self):
402
+ self.jinja_env = jinja2.Environment(
403
+ loader=jinja2.BaseLoader(),
404
+ autoescape=True
405
+ )
406
+
407
+ def render_template(self, template: str, context: Dict[str, Any]) -> str:
408
+ """Render a Jinja2 template with context."""
409
+ try:
410
+ jinja_template = self.jinja_env.from_string(template)
411
+ return jinja_template.render(**context)
412
+ except Exception as e:
413
+ logger.error(f"Template rendering failed: {e}")
414
+ return template
415
+
416
+ class AIGenerationManager:
417
+ """Manages AI-powered generation for API-Mocker."""
418
+
419
+ def __init__(self, api_key: Optional[str] = None):
420
+ self.ai_generator = AIGenerator(api_key)
421
+ self.template_engine = TemplateEngine()
422
+ self.cache_enabled = True
423
+ self.cache_ttl = 3600 # 1 hour
424
+
425
+ def generate_mock_data(self,
426
+ prompt: str,
427
+ endpoint: str,
428
+ method: str = "GET",
429
+ schema: Optional[Dict] = None,
430
+ count: int = 1,
431
+ language: str = "en",
432
+ context: Optional[Dict] = None) -> Dict[str, Any]:
433
+ """Generate mock data using AI."""
434
+
435
+ request = GenerationRequest(
436
+ prompt=prompt,
437
+ endpoint=endpoint,
438
+ method=method,
439
+ schema=schema,
440
+ count=count,
441
+ language=language,
442
+ context=context
443
+ )
444
+
445
+ response = self.ai_generator.generate_data(request)
446
+
447
+ return {
448
+ "data": response.data,
449
+ "metadata": response.metadata,
450
+ "generation_time": response.generation_time,
451
+ "cache_key": response.cache_key
452
+ }
453
+
454
+ def generate_from_schema(self, schema: Dict, count: int = 1) -> Dict[str, Any]:
455
+ """Generate data from JSON schema."""
456
+ return self.generate_mock_data(
457
+ prompt="Generate data based on the provided JSON schema",
458
+ endpoint="/schema-based",
459
+ schema=schema,
460
+ count=count
461
+ )
462
+
463
+ def generate_from_example(self, example: Dict, count: int = 1) -> Dict[str, Any]:
464
+ """Generate data based on an example."""
465
+ return self.generate_mock_data(
466
+ prompt=f"Generate data similar to this example: {json.dumps(example)}",
467
+ endpoint="/example-based",
468
+ count=count
469
+ )
470
+
471
+ def clear_cache(self):
472
+ """Clear the generation cache."""
473
+ self.ai_generator.cache.clear()
474
+ logger.info("AI generation cache cleared")
475
+
476
+ def get_cache_stats(self) -> Dict[str, Any]:
477
+ """Get cache statistics."""
478
+ return {
479
+ "cache_size": len(self.ai_generator.cache),
480
+ "cache_enabled": self.cache_enabled,
481
+ "cache_ttl": self.cache_ttl
482
+ }