npm - @techwavedev/agi-agent-kit - Versions diffs - 1.1.3 - Mend

@techwavedev/agi-agent-kit 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

package/templates/skills/core/webcrawler/scripts/extract_page.py ADDED Viewed

@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+"""
+Script: extract_page.py
+Purpose: Extract content from a single documentation page.
+Usage:
+    python extract_page.py --url <page-url> [options]
+Arguments:
+    --url, -u        Page URL to extract (required)
+    --output, -o     Output file (default: stdout)
+    --format, -f     Output format: md or json (default: md)
+    --include-links  Include internal links (default: true)
+Exit Codes:
+    0 - Success
+    1 - Invalid arguments
+    2 - Network error
+    3 - No content found
+"""
+import argparse
+import json
+import re
+import sys
+from datetime import datetime
+from urllib.parse import urljoin, urlparse
+try:
+    import requests
+    from bs4 import BeautifulSoup
+    import html2text
+except ImportError as e:
+    print(json.dumps({
+        "status": "error",
+        "message": f"Missing dependency: {e}. Install with: pip install requests beautifulsoup4 html2text lxml"
+    }), file=sys.stderr)
+    sys.exit(1)
+def extract_main_content(soup: BeautifulSoup) -> BeautifulSoup:
+    """Extract the main content area, removing navigation/sidebars."""
+    content_selectors = [
+        'main', 'article', '[role="main"]', '.main-content', '.content',
+        '.documentation', '.docs-content', '.markdown-body', '#content',
+        '#main-content', '.post-content',
+    ]
+    for selector in content_selectors:
+        content = soup.select_one(selector)
+        if content:
+            return content
+    body = soup.find('body')
+    if body:
+        for selector in ['nav', 'header', 'footer', 'aside', '.sidebar',
+                       '.navigation', '.nav', '.toc', '.menu']:
+            for element in body.select(selector):
+                element.decompose()
+        return body
+    return soup
+def extract_title(soup: BeautifulSoup) -> str:
+    """Extract page title."""
+    h1 = soup.find('h1')
+    if h1:
+        return h1.get_text(strip=True)
+    title = soup.find('title')
+    if title:
+        text = title.get_text(strip=True)
+        for sep in [' |', ' -', ' ::']:
+            if sep in text:
+                text = text.split(sep)[0].strip()
+        return text
+    return 'Untitled'
+def html_to_markdown(soup: BeautifulSoup, url: str) -> str:
+    """Convert HTML content to clean markdown."""
+    converter = html2text.HTML2Text()
+    converter.ignore_links = False
+    converter.ignore_images = False
+    converter.body_width = 0
+    converter.unicode_snob = True
+    html_content = str(soup)
+    markdown = converter.handle(html_content)
+    # Clean up excessive whitespace
+    markdown = re.sub(r'\n{3,}', '\n\n', markdown)
+    # Add source metadata
+    header = f"---\nsource: {url}\ncrawled: {datetime.now().isoformat()}\n---\n\n"
+    return header + markdown.strip()
+def fetch_and_extract(url: str, include_links: bool = True) -> dict:
+    """Fetch a page and extract its content."""
+    headers = {
+        'User-Agent': 'DocumentationHarvester/1.0',
+        'Accept': 'text/html,application/xhtml+xml',
+    }
+    response = requests.get(url, headers=headers, timeout=30)
+    response.raise_for_status()
+    soup = BeautifulSoup(response.content, 'lxml')
+    title = extract_title(soup)
+    main_content = extract_main_content(soup)
+    markdown = html_to_markdown(main_content, url)
+    # Extract links if requested
+    links = []
+    if include_links:
+        domain = urlparse(url).netloc
+        for anchor in main_content.find_all('a', href=True):
+            href = anchor['href']
+            if href.startswith('#'):
+                continue
+            absolute_url = urljoin(url, href)
+            if urlparse(absolute_url).netloc == domain:
+                links.append({
+                    'url': absolute_url,
+                    'text': anchor.get_text(strip=True),
+                })
+    return {
+        'url': url,
+        'title': title,
+        'content': markdown,
+        'word_count': len(markdown.split()),
+        'links': links,
+        'timestamp': datetime.now().isoformat(),
+    }
+def main():
+    parser = argparse.ArgumentParser(description='Extract content from a documentation page.')
+    parser.add_argument('--url', '-u', required=True, help='Page URL to extract')
+    parser.add_argument('--output', '-o', help='Output file (default: stdout)')
+    parser.add_argument('--format', '-f', choices=['md', 'json'], default='md', help='Output format')
+    parser.add_argument('--include-links', action='store_true', default=True, help='Include internal links')
+    args = parser.parse_args()
+    try:
+        result = fetch_and_extract(args.url, args.include_links)
+        if args.format == 'md':
+            output = result['content']
+        else:
+            output = json.dumps(result, indent=2, ensure_ascii=False)
+        if args.output:
+            with open(args.output, 'w', encoding='utf-8') as f:
+                f.write(output)
+            print(json.dumps({
+                "status": "success",
+                "title": result['title'],
+                "word_count": result['word_count'],
+                "output": args.output
+            }))
+        else:
+            print(output)
+        sys.exit(0)
+    except requests.exceptions.RequestException as e:
+        print(json.dumps({
+            "status": "error",
+            "type": "network_error",
+            "message": str(e)
+        }), file=sys.stderr)
+        sys.exit(2)
+    except Exception as e:
+        print(json.dumps({
+            "status": "error",
+            "type": type(e).__name__,
+            "message": str(e)
+        }), file=sys.stderr)
+        sys.exit(3)
+if __name__ == '__main__':
+    main()

package/templates/skills/core/webcrawler/scripts/filter_docs.py ADDED Viewed

@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+"""
+Script: filter_docs.py
+Purpose: Filter already-crawled documentation by subject or pattern.
+Usage:
+    python filter_docs.py --input <crawl-dir> --subject <topic> --output <output-dir>
+Arguments:
+    --input, -i      Crawled docs directory (required)
+    --subject, -s    Subject to filter for (required)
+    --output, -o     Filtered output directory (required)
+    --threshold, -t  Relevance threshold 0.0-1.0 (default: 0.3)
+Exit Codes:
+    0 - Success
+    1 - Invalid arguments
+    2 - Input not found
+    3 - No matching content
+"""
+import argparse
+import json
+import re
+import sys
+from pathlib import Path
+from datetime import datetime
+def calculate_relevance(content: str, title: str, subject: str) -> float:
+    """Calculate relevance score based on subject keywords."""
+    subject_lower = subject.lower()
+    subject_words = [w for w in subject_lower.split() if len(w) > 2]
+    if not subject_words:
+        return 0.0
+    text = (title + ' ' + content).lower()
+    total_words = len(text.split())
+    if total_words == 0:
+        return 0.0
+    # Count keyword occurrences
+    keyword_count = 0
+    for word in subject_words:
+        keyword_count += len(re.findall(r'\b' + re.escape(word) + r'\b', text))
+    # Calculate density-based score
+    density = keyword_count / total_words
+    # Bonus for title matches
+    title_lower = title.lower()
+    title_bonus = 0.3 if any(w in title_lower for w in subject_words) else 0.0
+    # Normalize to 0-1 range
+    score = min(1.0, (density * 100) + title_bonus)
+    return score
+def filter_crawled_docs(input_dir: Path, subject: str, threshold: float) -> list:
+    """Filter documents based on relevance to subject."""
+    pages_dir = input_dir / 'pages'
+    if not pages_dir.exists():
+        raise FileNotFoundError(f"Pages directory not found: {pages_dir}")
+    filtered = []
+    for md_file in pages_dir.glob('*.md'):
+        content = md_file.read_text(encoding='utf-8')
+        # Extract title from frontmatter or first heading
+        title = 'Untitled'
+        if content.startswith('---'):
+            # Has frontmatter
+            end = content.find('---', 3)
+            if end > 0:
+                body = content[end + 3:].strip()
+                # Try to get first heading
+                match = re.search(r'^#\s+(.+)$', body, re.MULTILINE)
+                if match:
+                    title = match.group(1)
+        else:
+            match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
+            if match:
+                title = match.group(1)
+        score = calculate_relevance(content, title, subject)
+        if score >= threshold:
+            filtered.append({
+                'file': md_file.name,
+                'title': title,
+                'score': score,
+                'content': content,
+                'word_count': len(content.split()),
+            })
+    # Sort by relevance
+    filtered.sort(key=lambda x: x['score'], reverse=True)
+    return filtered
+def save_filtered(filtered: list, output_dir: Path, subject: str, input_dir: Path):
+    """Save filtered documents to output directory."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    pages_dir = output_dir / 'pages'
+    pages_dir.mkdir(exist_ok=True)
+    # Copy filtered pages
+    for page in filtered:
+        (pages_dir / page['file']).write_text(page['content'], encoding='utf-8')
+    # Generate new index
+    index_lines = [
+        f"# {subject} (Filtered Documentation)",
+        "",
+        f"> Filtered from: {input_dir}",
+        f"> Pages: {len(filtered)}",
+        f"> Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+        "",
+        "## Table of Contents",
+        "",
+    ]
+    for page in filtered:
+        index_lines.append(f"- [{page['title']}](pages/{page['file']}) (relevance: {page['score']:.2f})")
+    (output_dir / 'index.md').write_text('\n'.join(index_lines), encoding='utf-8')
+    # Save metadata
+    metadata = {
+        'subject': subject,
+        'source': str(input_dir),
+        'pages_filtered': len(filtered),
+        'timestamp': datetime.now().isoformat(),
+        'pages': [{k: v for k, v in p.items() if k != 'content'} for p in filtered]
+    }
+    (output_dir / 'metadata.json').write_text(json.dumps(metadata, indent=2), encoding='utf-8')
+def main():
+    parser = argparse.ArgumentParser(description='Filter crawled documentation by subject.')
+    parser.add_argument('--input', '-i', required=True, help='Crawled docs directory')
+    parser.add_argument('--subject', '-s', required=True, help='Subject to filter for')
+    parser.add_argument('--output', '-o', required=True, help='Filtered output directory')
+    parser.add_argument('--threshold', '-t', type=float, default=0.3, help='Relevance threshold')
+    args = parser.parse_args()
+    input_dir = Path(args.input)
+    output_dir = Path(args.output)
+    if not input_dir.exists():
+        print(json.dumps({
+            "status": "error",
+            "message": f"Input directory not found: {input_dir}"
+        }), file=sys.stderr)
+        sys.exit(2)
+    try:
+        print(f"🔍 Filtering docs for: {args.subject}")
+        print(f"   Source: {input_dir}")
+        print(f"   Threshold: {args.threshold}")
+        print()
+        filtered = filter_crawled_docs(input_dir, args.subject, args.threshold)
+        if not filtered:
+            print(json.dumps({
+                "status": "error",
+                "message": "No pages matched the filter criteria"
+            }), file=sys.stderr)
+            sys.exit(3)
+        save_filtered(filtered, output_dir, args.subject, input_dir)
+        print(f"✅ Filtered {len(filtered)} pages")
+        print()
+        print(json.dumps({
+            "status": "success",
+            "pages_filtered": len(filtered),
+            "output": str(output_dir)
+        }, indent=2))
+        sys.exit(0)
+    except Exception as e:
+        print(json.dumps({
+            "status": "error",
+            "type": type(e).__name__,
+            "message": str(e)
+        }), file=sys.stderr)
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

package/templates/skills/knowledge/api-patterns/SKILL.md ADDED Viewed

@@ -0,0 +1,81 @@
+---
+name: api-patterns
+description: API design principles and decision-making. REST vs GraphQL vs tRPC selection, response formats, versioning, pagination.
+allowed-tools: Read, Write, Edit, Glob, Grep
+---
+# API Patterns
+> API design principles and decision-making for 2025.
+> **Learn to THINK, not copy fixed patterns.**
+## 🎯 Selective Reading Rule
+**Read ONLY files relevant to the request!** Check the content map, find what you need.
+---
+## 📑 Content Map
+| File | Description | When to Read |
+|------|-------------|--------------|
+| `api-style.md` | REST vs GraphQL vs tRPC decision tree | Choosing API type |
+| `rest.md` | Resource naming, HTTP methods, status codes | Designing REST API |
+| `response.md` | Envelope pattern, error format, pagination | Response structure |
+| `graphql.md` | Schema design, when to use, security | Considering GraphQL |
+| `trpc.md` | TypeScript monorepo, type safety | TS fullstack projects |
+| `versioning.md` | URI/Header/Query versioning | API evolution planning |
+| `auth.md` | JWT, OAuth, Passkey, API Keys | Auth pattern selection |
+| `rate-limiting.md` | Token bucket, sliding window | API protection |
+| `documentation.md` | OpenAPI/Swagger best practices | Documentation |
+| `security-testing.md` | OWASP API Top 10, auth/authz testing | Security audits |
+---
+## 🔗 Related Skills
+| Need | Skill |
+|------|-------|
+| API implementation | `@[skills/backend-development]` |
+| Data structure | `@[skills/database-design]` |
+| Security details | `@[skills/security-hardening]` |
+---
+## ✅ Decision Checklist
+Before designing an API:
+- [ ] **Asked user about API consumers?**
+- [ ] **Chosen API style for THIS context?** (REST/GraphQL/tRPC)
+- [ ] **Defined consistent response format?**
+- [ ] **Planned versioning strategy?**
+- [ ] **Considered authentication needs?**
+- [ ] **Planned rate limiting?**
+- [ ] **Documentation approach defined?**
+---
+## ❌ Anti-Patterns
+**DON'T:**
+- Default to REST for everything
+- Use verbs in REST endpoints (/getUsers)
+- Return inconsistent response formats
+- Expose internal errors to clients
+- Skip rate limiting
+**DO:**
+- Choose API style based on context
+- Ask about client requirements
+- Document thoroughly
+- Use appropriate status codes
+---
+## Script
+| Script | Purpose | Command |
+|--------|---------|---------|
+| `scripts/api_validator.py` | API endpoint validation | `python scripts/api_validator.py <project_path>` |

package/templates/skills/knowledge/api-patterns/api-style.md ADDED Viewed

@@ -0,0 +1,42 @@
+# API Style Selection (2025)
+> REST vs GraphQL vs tRPC - Hangi durumda hangisi?
+## Decision Tree
+```
+Who are the API consumers?
+│
+├── Public API / Multiple platforms
+│   └── REST + OpenAPI (widest compatibility)
+│
+├── Complex data needs / Multiple frontends
+│   └── GraphQL (flexible queries)
+│
+├── TypeScript frontend + backend (monorepo)
+│   └── tRPC (end-to-end type safety)
+│
+├── Real-time / Event-driven
+│   └── WebSocket + AsyncAPI
+│
+└── Internal microservices
+    └── gRPC (performance) or REST (simplicity)
+```
+## Comparison
+| Factor | REST | GraphQL | tRPC |
+|--------|------|---------|------|
+| **Best for** | Public APIs | Complex apps | TS monorepos |
+| **Learning curve** | Low | Medium | Low (if TS) |
+| **Over/under fetching** | Common | Solved | Solved |
+| **Type safety** | Manual (OpenAPI) | Schema-based | Automatic |
+| **Caching** | HTTP native | Complex | Client-based |
+## Selection Questions
+1. Who are the API consumers?
+2. Is the frontend TypeScript?
+3. How complex are the data relationships?
+4. Is caching critical?
+5. Public or internal API?

package/templates/skills/knowledge/api-patterns/auth.md ADDED Viewed

@@ -0,0 +1,24 @@
+# Authentication Patterns
+> Choose auth pattern based on use case.
+## Selection Guide
+| Pattern | Best For |
+|---------|----------|
+| **JWT** | Stateless, microservices |
+| **Session** | Traditional web, simple |
+| **OAuth 2.0** | Third-party integration |
+| **API Keys** | Server-to-server, public APIs |
+| **Passkey** | Modern passwordless (2025+) |
+## JWT Principles
+```
+Important:
+├── Always verify signature
+├── Check expiration
+├── Include minimal claims
+├── Use short expiry + refresh tokens
+└── Never store sensitive data in JWT
+```

package/templates/skills/knowledge/api-patterns/documentation.md ADDED Viewed

@@ -0,0 +1,26 @@
+# API Documentation Principles
+> Good docs = happy developers = API adoption.
+## OpenAPI/Swagger Essentials
+```
+Include:
+├── All endpoints with examples
+├── Request/response schemas
+├── Authentication requirements
+├── Error response formats
+└── Rate limiting info
+```
+## Good Documentation Has
+```
+Essentials:
+├── Quick start / Getting started
+├── Authentication guide
+├── Complete API reference
+├── Error handling guide
+├── Code examples (multiple languages)
+└── Changelog
+```

package/templates/skills/knowledge/api-patterns/graphql.md ADDED Viewed

@@ -0,0 +1,41 @@
+# GraphQL Principles
+> Flexible queries for complex, interconnected data.
+## When to Use
+```
+✅ Good fit:
+├── Complex, interconnected data
+├── Multiple frontend platforms
+├── Clients need flexible queries
+├── Evolving data requirements
+└── Reducing over-fetching matters
+❌ Poor fit:
+├── Simple CRUD operations
+├── File upload heavy
+├── HTTP caching important
+└── Team unfamiliar with GraphQL
+```
+## Schema Design Principles
+```
+Principles:
+├── Think in graphs, not endpoints
+├── Design for evolvability (no versions)
+├── Use connections for pagination
+├── Be specific with types (not generic "data")
+└── Handle nullability thoughtfully
+```
+## Security Considerations
+```
+Protect against:
+├── Query depth attacks → Set max depth
+├── Query complexity → Calculate cost
+├── Batching abuse → Limit batch size
+├── Introspection → Disable in production
+```

package/templates/skills/knowledge/api-patterns/rate-limiting.md ADDED Viewed

@@ -0,0 +1,31 @@
+# Rate Limiting Principles
+> Protect your API from abuse and overload.
+## Why Rate Limit
+```
+Protect against:
+├── Brute force attacks
+├── Resource exhaustion
+├── Cost overruns (if pay-per-use)
+└── Unfair usage
+```
+## Strategy Selection
+| Type | How | When |
+|------|-----|------|
+| **Token bucket** | Burst allowed, refills over time | Most APIs |
+| **Sliding window** | Smooth distribution | Strict limits |
+| **Fixed window** | Simple counters per window | Basic needs |
+## Response Headers
+```
+Include in headers:
+├── X-RateLimit-Limit (max requests)
+├── X-RateLimit-Remaining (requests left)
+├── X-RateLimit-Reset (when limit resets)
+└── Return 429 when exceeded
+```

package/templates/skills/knowledge/api-patterns/response.md ADDED Viewed

@@ -0,0 +1,37 @@
+# Response Format Principles
+> Consistency is key - choose a format and stick to it.
+## Common Patterns
+```
+Choose one:
+├── Envelope pattern ({ success, data, error })
+├── Direct data (just return the resource)
+└── HAL/JSON:API (hypermedia)
+```
+## Error Response
+```
+Include:
+├── Error code (for programmatic handling)
+├── User message (for display)
+├── Details (for debugging, field-level errors)
+├── Request ID (for support)
+└── NOT internal details (security!)
+```
+## Pagination Types
+| Type | Best For | Trade-offs |
+|------|----------|------------|
+| **Offset** | Simple, jumpable | Performance on large datasets |
+| **Cursor** | Large datasets | Can't jump to page |
+| **Keyset** | Performance critical | Requires sortable key |
+### Selection Questions
+1. How large is the dataset?
+2. Do users need to jump to specific pages?
+3. Is data frequently changing?