stacksagent 1.4.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,522 @@
1
+ """
2
+ Example-Driven Knowledge Base Module
3
+
4
+ Manages complete, runnable code examples with test cases, pitfalls, and live references.
5
+ Provides example search, retrieval, and validation capabilities.
6
+ """
7
+
8
+ import csv
9
+ import json
10
+ import re
11
+ from pathlib import Path
12
+ from typing import List, Dict, Any, Optional
13
+ from dataclasses import dataclass
14
+
15
+
16
+ # Example type priorities for boosting
17
+ EXAMPLE_TYPE_BOOST = {
18
+ 'quickstart': 1.0, # Base priority
19
+ 'integration': 1.2, # Higher for complex queries
20
+ 'debugging': 1.3, # Highest for error queries
21
+ 'best-practice': 1.1, # Moderate boost
22
+ 'security': 1.4 # Highest for security queries
23
+ }
24
+
25
+
26
+ @dataclass
27
+ class Example:
28
+ """Represents a complete code example"""
29
+ id: str
30
+ domain: str
31
+ example_type: str
32
+ scenario: str
33
+ problem: str
34
+ solution_code: str
35
+ explanation: str
36
+ test_inputs: str # JSON string
37
+ expected_outputs: str # JSON string
38
+ pitfalls: str
39
+ live_example_url: str
40
+ related_snippets: str
41
+ tags: str
42
+ difficulty: str
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert to dictionary representation"""
46
+ return {
47
+ 'id': self.id,
48
+ 'domain': self.domain,
49
+ 'example_type': self.example_type,
50
+ 'scenario': self.scenario,
51
+ 'problem': self.problem,
52
+ 'solution_code': self.solution_code,
53
+ 'explanation': self.explanation,
54
+ 'test_inputs': self.test_inputs,
55
+ 'expected_outputs': self.expected_outputs,
56
+ 'pitfalls': self.pitfalls,
57
+ 'live_example_url': self.live_example_url,
58
+ 'related_snippets': self.related_snippets,
59
+ 'tags': self.tags,
60
+ 'difficulty': self.difficulty
61
+ }
62
+
63
+ def get_test_inputs(self) -> Dict:
64
+ """Parse test_inputs JSON"""
65
+ try:
66
+ return json.loads(self.test_inputs) if self.test_inputs else {}
67
+ except json.JSONDecodeError:
68
+ return {}
69
+
70
+ def get_expected_outputs(self) -> Dict:
71
+ """Parse expected_outputs JSON"""
72
+ try:
73
+ return json.loads(self.expected_outputs) if self.expected_outputs else {}
74
+ except json.JSONDecodeError:
75
+ return {}
76
+
77
+ def get_related_snippet_refs(self) -> List[Dict[str, str]]:
78
+ """
79
+ Parse related_snippets into list of references
80
+
81
+ Format: "domain.csv:id,domain2.csv:id2"
82
+ Returns: [{"file": "domain.csv", "id": "id"}, ...]
83
+ """
84
+ if not self.related_snippets:
85
+ return []
86
+
87
+ refs = []
88
+ for ref in self.related_snippets.split(','):
89
+ ref = ref.strip()
90
+ if ':' in ref:
91
+ file, snippet_id = ref.split(':', 1)
92
+ refs.append({'file': file, 'id': snippet_id})
93
+
94
+ return refs
95
+
96
+
97
+ def load_examples(
98
+ csv_path: Optional[Path] = None,
99
+ domain: Optional[str] = None,
100
+ example_type: Optional[str] = None,
101
+ difficulty: Optional[str] = None
102
+ ) -> List[Example]:
103
+ """
104
+ Load examples from CSV file with optional filtering
105
+
106
+ Args:
107
+ csv_path: Path to examples.csv. If None, uses default location.
108
+ domain: Filter by domain (defi, nfts, etc.)
109
+ example_type: Filter by example type (quickstart, integration, etc.)
110
+ difficulty: Filter by difficulty (beginner, intermediate, advanced)
111
+
112
+ Returns:
113
+ List of Example objects
114
+ """
115
+ if csv_path is None:
116
+ script_dir = Path(__file__).parent
117
+ csv_path = script_dir.parent / 'data' / 'examples.csv'
118
+
119
+ if not csv_path.exists():
120
+ return []
121
+
122
+ examples = []
123
+
124
+ with open(csv_path, 'r', encoding='utf-8') as f:
125
+ reader = csv.DictReader(f)
126
+ for row in reader:
127
+ example = Example(
128
+ id=row['id'],
129
+ domain=row['domain'],
130
+ example_type=row['example_type'],
131
+ scenario=row['scenario'],
132
+ problem=row['problem'],
133
+ solution_code=row['solution_code'],
134
+ explanation=row['explanation'],
135
+ test_inputs=row.get('test_inputs', ''),
136
+ expected_outputs=row.get('expected_outputs', ''),
137
+ pitfalls=row.get('pitfalls', ''),
138
+ live_example_url=row.get('live_example_url', ''),
139
+ related_snippets=row.get('related_snippets', ''),
140
+ tags=row.get('tags', ''),
141
+ difficulty=row.get('difficulty', 'intermediate')
142
+ )
143
+
144
+ # Apply filters
145
+ if domain and example.domain != domain:
146
+ continue
147
+ if example_type and example.example_type != example_type:
148
+ continue
149
+ if difficulty and example.difficulty != difficulty:
150
+ continue
151
+
152
+ examples.append(example)
153
+
154
+ return examples
155
+
156
+
157
+ def example_to_searchable_text(example: Example) -> str:
158
+ """
159
+ Convert example to searchable text for BM25 indexing
160
+
161
+ Args:
162
+ example: Example object
163
+
164
+ Returns:
165
+ Concatenated searchable text
166
+ """
167
+ import re
168
+
169
+ # Normalize tags: split by comma to make individual keywords searchable
170
+ tags_normalized = example.tags.replace(',', ' ')
171
+
172
+ # Normalize scenario: replace hyphens with spaces so "wallet-connect-flow" matches "connect"
173
+ scenario_normalized = example.scenario.replace('-', ' ')
174
+
175
+ # Extract source project names from explanation and solution_code
176
+ # Look for patterns like "from sbtc-market-frontend", "STX City", etc.
177
+ source_projects = []
178
+ combined_text = f"{example.explanation} {example.solution_code[:500]}" # First 500 chars of code
179
+
180
+ # Match common patterns (case-insensitive)
181
+ project_patterns = [
182
+ # Match: "from PROJECT-frontend/backend"
183
+ r'from\s+([a-z0-9-]+(?:frontend|backend|app|market|demo|project))',
184
+ # Match: "PROJECT/src/"
185
+ r'([a-z0-9-]+(?:frontend|backend|app|market|demo|project))/src',
186
+ # Match: "PROJECT patterns"
187
+ r'([a-z0-9-]+(?:frontend|backend|app|market|demo|project))\s+patterns',
188
+ # Match: "Pattern from PROJECT"
189
+ r'Pattern from\s+([a-z0-9-]+)',
190
+ # Match: "Source: PROJECT"
191
+ r'Source:\s+([a-z0-9-]+)',
192
+ # Match: "STX City", "Stacks Punks", etc (two-word project names)
193
+ r'\b(stx\s+city|stacks\s+punks|bitcoin\s+monkeys|gamma\s+marketplace)\b',
194
+ ]
195
+
196
+ for pattern in project_patterns:
197
+ matches = re.findall(pattern, combined_text, re.IGNORECASE)
198
+ source_projects.extend([m.lower() for m in matches])
199
+
200
+ # Extract individual words from project names
201
+ # e.g., "sbtc-market-frontend" → ["sbtc", "market"]
202
+ # e.g., "stx city" → ["stx", "city"]
203
+ project_keywords = []
204
+ for project in source_projects:
205
+ # Remove common suffixes
206
+ project_clean = project.replace('-frontend', '').replace('-backend', '').replace('-app', '')
207
+ # Split by hyphens and spaces
208
+ words = re.split(r'[-\s]+', project_clean)
209
+ project_keywords.extend(words)
210
+
211
+ # Join unique keywords, removing empty strings
212
+ project_keywords_str = ' '.join(set(w for w in project_keywords if w))
213
+
214
+ return ' '.join([
215
+ scenario_normalized,
216
+ example.problem,
217
+ example.explanation,
218
+ tags_normalized,
219
+ example.domain,
220
+ example.example_type,
221
+ project_keywords_str # Add extracted project keywords
222
+ ])
223
+
224
+
225
+ def normalize_query(query: str) -> str:
226
+ """
227
+ Normalize search query to handle common variations and abbreviations
228
+
229
+ Args:
230
+ query: Original search query
231
+
232
+ Returns:
233
+ Expanded query with common variations
234
+ """
235
+ import re
236
+
237
+ query_lower = query.lower()
238
+ expanded_terms = [query] # Always include original query
239
+
240
+ # SIP standard variations
241
+ sip_variations = {
242
+ 'sip10': ['sip010', 'sip-010', 'fungible-token'],
243
+ 'sip9': ['sip009', 'sip-009', 'nft'],
244
+ 'sip013': ['sip-013', 'transfer-memo'],
245
+ 'sip016': ['sip-016', 'token-metadata'],
246
+ }
247
+
248
+ # Common abbreviations and project names
249
+ abbreviations = {
250
+ 'ft': ['fungible-token', 'sip010', 'token'],
251
+ 'nft': ['non-fungible-token', 'sip009', 'sip-009'],
252
+ 'pc': ['post-condition', 'postcondition'],
253
+ 'dao': ['decentralized-autonomous-organization'],
254
+ 'dex': ['decentralized-exchange', 'swap', 'liquidity'],
255
+ 'sbtc': ['sbtc-market', 'bitcoin', 'bridge'],
256
+ 'pyth': ['pyth-oracle', 'oracle', 'price-feed'],
257
+ 'stxcity': ['stx city', 'stx-city', 'bonding-curve'], # NOTE: Added "stx city" with space
258
+ }
259
+
260
+ # Check each word in the query
261
+ words = query_lower.split()
262
+ for word in words:
263
+ # Check SIP variations
264
+ if word in sip_variations:
265
+ expanded_terms.extend(sip_variations[word])
266
+
267
+ # Check abbreviations
268
+ if word in abbreviations:
269
+ expanded_terms.extend(abbreviations[word])
270
+
271
+ # Handle zero-padded numbers (sip10 → sip010, sip9 → sip009)
272
+ sip_match = re.match(r'sip(\d{1,2})$', word)
273
+ if sip_match:
274
+ num = sip_match.group(1)
275
+ # Add zero-padded version
276
+ padded = f'sip{int(num):03d}'
277
+ if padded not in expanded_terms:
278
+ expanded_terms.append(padded)
279
+ # Also add hyphenated version
280
+ hyphenated = f'sip-{int(num):03d}'
281
+ if hyphenated not in expanded_terms:
282
+ expanded_terms.append(hyphenated)
283
+
284
+ # Join all expanded terms
285
+ return ' '.join(expanded_terms)
286
+
287
+
288
+ def search_examples(
289
+ query: str,
290
+ domain: Optional[str] = None,
291
+ max_results: int = 5,
292
+ example_type: Optional[str] = None,
293
+ difficulty: Optional[str] = None
294
+ ) -> List[Dict[str, Any]]:
295
+ """
296
+ Search examples using BM25 algorithm with type boosting
297
+
298
+ Args:
299
+ query: Search query
300
+ domain: Filter by domain
301
+ max_results: Maximum results to return
302
+ example_type: Filter by example type
303
+ difficulty: Filter by difficulty level
304
+
305
+ Returns:
306
+ List of matching examples with scores
307
+ """
308
+ from core import BM25 # Import BM25 from existing core module
309
+
310
+ # Normalize query to handle common variations
311
+ normalized_query = normalize_query(query)
312
+
313
+ # Load examples
314
+ examples = load_examples(domain=domain, example_type=example_type, difficulty=difficulty)
315
+
316
+ if not examples:
317
+ return []
318
+
319
+ # Create searchable documents
320
+ documents = [example_to_searchable_text(ex) for ex in examples]
321
+
322
+ # BM25 scoring with normalized query
323
+ bm25 = BM25(documents)
324
+ scores = [(i, bm25.score(normalized_query, i)) for i in range(len(documents))]
325
+
326
+ # Apply example type boosting based on query intent
327
+ query_lower = query.lower()
328
+
329
+ # Boost debugging examples for error/failure queries
330
+ if any(kw in query_lower for kw in ['debug', 'error', 'fail', 'why', 'issue', 'problem']):
331
+ for i, (idx, score) in enumerate(scores):
332
+ if examples[idx].example_type == 'debugging':
333
+ scores[i] = (idx, score * EXAMPLE_TYPE_BOOST['debugging'])
334
+
335
+ # Boost security examples for security queries
336
+ elif any(kw in query_lower for kw in ['secure', 'safe', 'vulnerability', 'attack', 'exploit']):
337
+ for i, (idx, score) in enumerate(scores):
338
+ if examples[idx].example_type == 'security':
339
+ scores[i] = (idx, score * EXAMPLE_TYPE_BOOST['security'])
340
+
341
+ # Boost integration examples for complex queries
342
+ elif any(kw in query_lower for kw in ['integrate', 'combine', 'together', 'workflow']):
343
+ for i, (idx, score) in enumerate(scores):
344
+ if examples[idx].example_type == 'integration':
345
+ scores[i] = (idx, score * EXAMPLE_TYPE_BOOST['integration'])
346
+
347
+ # Sort by score (descending)
348
+ scores.sort(key=lambda x: x[1], reverse=True)
349
+
350
+ # Format results
351
+ results = []
352
+ for idx, score in scores[:max_results]:
353
+ if score > 0:
354
+ result = examples[idx].to_dict()
355
+ result['_score'] = round(score, 3)
356
+ result['_type'] = 'example'
357
+ results.append(result)
358
+
359
+ return results
360
+
361
+
362
+ def detect_example_preference(query: str) -> bool:
363
+ """
364
+ Detect if query is asking for examples vs code snippets
365
+
366
+ Args:
367
+ query: Search query
368
+
369
+ Returns:
370
+ True if examples should be preferred
371
+ """
372
+ example_keywords = [
373
+ 'how do i', 'how to', 'example of', 'show me',
374
+ 'why does', 'integrate', 'debugging', 'fails',
375
+ 'best practice', 'secure way', 'production',
376
+ 'complete', 'full code', 'working'
377
+ ]
378
+
379
+ query_lower = query.lower()
380
+ return any(kw in query_lower for kw in example_keywords)
381
+
382
+
383
+ def get_related_snippets(example: Example) -> List[Dict[str, Any]]:
384
+ """
385
+ Fetch related code snippets referenced in example
386
+
387
+ Args:
388
+ example: Example object with related_snippets field
389
+
390
+ Returns:
391
+ List of related snippet records from CSV files
392
+ """
393
+ refs = example.get_related_snippet_refs()
394
+ if not refs:
395
+ return []
396
+
397
+ script_dir = Path(__file__).parent
398
+ data_dir = script_dir.parent / 'data'
399
+
400
+ snippets = []
401
+
402
+ for ref in refs:
403
+ csv_file = ref['file']
404
+ snippet_id = ref['id']
405
+
406
+ csv_path = data_dir / csv_file
407
+
408
+ if not csv_path.exists():
409
+ continue
410
+
411
+ try:
412
+ with open(csv_path, 'r', encoding='utf-8') as f:
413
+ reader = csv.DictReader(f)
414
+ for row in reader:
415
+ if row.get('id') == snippet_id:
416
+ snippet = dict(row)
417
+ snippet['_source_file'] = csv_file
418
+ snippets.append(snippet)
419
+ break
420
+ except Exception:
421
+ continue
422
+
423
+ return snippets
424
+
425
+
426
+ def format_example_for_display(example: Dict[str, Any]) -> str:
427
+ """
428
+ Format example for CLI display
429
+
430
+ Args:
431
+ example: Example dictionary
432
+
433
+ Returns:
434
+ Formatted string for display
435
+ """
436
+ lines = []
437
+
438
+ # Header
439
+ lines.append(f"\n{'=' * 80}")
440
+ lines.append(f"Example: {example['scenario']}")
441
+ lines.append(f"Domain: {example['domain']} | Type: {example['example_type']} | Difficulty: {example['difficulty']}")
442
+ lines.append(f"Score: {example.get('_score', 0):.3f}")
443
+ lines.append(f"{'=' * 80}\n")
444
+
445
+ # Problem
446
+ lines.append(f"Problem:\n{example['problem']}\n")
447
+
448
+ # Solution
449
+ lines.append(f"Solution:")
450
+ lines.append(f"{example['solution_code']}\n")
451
+
452
+ # Explanation
453
+ if example.get('explanation'):
454
+ lines.append(f"Explanation:\n{example['explanation']}\n")
455
+
456
+ # Test inputs/outputs
457
+ if example.get('test_inputs'):
458
+ lines.append(f"Test Inputs: {example['test_inputs']}")
459
+ if example.get('expected_outputs'):
460
+ lines.append(f"Expected Outputs: {example['expected_outputs']}\n")
461
+
462
+ # Pitfalls
463
+ if example.get('pitfalls'):
464
+ lines.append(f"⚠️ Common Pitfalls:\n{example['pitfalls']}\n")
465
+
466
+ # Live example
467
+ if example.get('live_example_url'):
468
+ lines.append(f"🔗 Live Example: {example['live_example_url']}\n")
469
+
470
+ # Related snippets
471
+ if example.get('related_snippets'):
472
+ lines.append(f"Related: {example['related_snippets']}\n")
473
+
474
+ # Tags
475
+ if example.get('tags'):
476
+ lines.append(f"Tags: {example['tags']}")
477
+
478
+ lines.append(f"\n{'=' * 80}\n")
479
+
480
+ return '\n'.join(lines)
481
+
482
+
483
+ if __name__ == '__main__':
484
+ # Test the module
485
+ import sys
486
+
487
+ if len(sys.argv) > 1:
488
+ query = ' '.join(sys.argv[1:])
489
+ results = search_examples(query, max_results=3)
490
+
491
+ if results:
492
+ print(f"Found {len(results)} examples for '{query}':\n")
493
+ for result in results:
494
+ print(format_example_for_display(result))
495
+ else:
496
+ print(f"No examples found for '{query}'")
497
+ else:
498
+ # Load and show statistics
499
+ examples = load_examples()
500
+ print(f"Total examples loaded: {len(examples)}")
501
+
502
+ if examples:
503
+ domains = {}
504
+ types = {}
505
+ difficulties = {}
506
+
507
+ for ex in examples:
508
+ domains[ex.domain] = domains.get(ex.domain, 0) + 1
509
+ types[ex.example_type] = types.get(ex.example_type, 0) + 1
510
+ difficulties[ex.difficulty] = difficulties.get(ex.difficulty, 0) + 1
511
+
512
+ print(f"\nBy domain:")
513
+ for domain, count in sorted(domains.items()):
514
+ print(f" {domain}: {count}")
515
+
516
+ print(f"\nBy type:")
517
+ for ex_type, count in sorted(types.items()):
518
+ print(f" {ex_type}: {count}")
519
+
520
+ print(f"\nBy difficulty:")
521
+ for diff, count in sorted(difficulties.items()):
522
+ print(f" {diff}: {count}")