npm - dasa-sradha-kit - Versions diffs - 5.0.0 - Mend

dasa-sradha-kit 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.agent/.shared/infinite-memory.md +19 -0
package/.agent/.shared/max-power-core.md +27 -0
package/.agent/ARCHITECTURE.md +104 -0
package/.agent/agents/dasa-dharma.md +21 -0
package/.agent/agents/dasa-dwipa.md +21 -0
package/.agent/agents/dasa-indra.md +21 -0
package/.agent/agents/dasa-kala.md +21 -0
package/.agent/agents/dasa-mpu.md +21 -0
package/.agent/agents/dasa-nala.md +21 -0
package/.agent/agents/dasa-patih.md +21 -0
package/.agent/agents/dasa-rsi.md +25 -0
package/.agent/agents/dasa-sastra.md +21 -0
package/.agent/agents/dasa-widya.md +21 -0
package/.agent/rules/GEMINI.md +183 -0
package/.agent/scripts/api_validator.py +70 -0
package/.agent/scripts/arch_mapper.py +101 -0
package/.agent/scripts/compact_memory.py +68 -0
package/.agent/scripts/complexity_scorer.py +82 -0
package/.agent/scripts/context_mapper.py +91 -0
package/.agent/scripts/design_engine.py +108 -0
package/.agent/scripts/design_memory_sync.py +87 -0
package/.agent/scripts/lint_fixer.py +79 -0
package/.agent/scripts/qa_gate.py +84 -0
package/.agent/scripts/security_scan.py +82 -0
package/.agent/scripts/semantic-scan.py +56 -0
package/.agent/scripts/skill_search.py +91 -0
package/.agent/scripts/status_parser.py +78 -0
package/.agent/scripts/test_runner.py +98 -0
package/.agent/scripts/validate_env.py +71 -0
package/.agent/scripts/web_scraper.py +86 -0
package/.agent/scripts/workspace-mapper.py +58 -0
package/.agent/skills/.gitkeep +0 -0
package/.agent/workflows/dasa-api.md +42 -0
package/.agent/workflows/dasa-assimilate.md +44 -0
package/.agent/workflows/dasa-commit.md +46 -0
package/.agent/workflows/dasa-docs.md +46 -0
package/.agent/workflows/dasa-e2e.md +41 -0
package/.agent/workflows/dasa-feature.md +46 -0
package/.agent/workflows/dasa-fix.md +37 -0
package/.agent/workflows/dasa-init.md +29 -0
package/.agent/workflows/dasa-plan.md +56 -0
package/.agent/workflows/dasa-pr.md +47 -0
package/.agent/workflows/dasa-refactor.md +44 -0
package/.agent/workflows/dasa-seed.md +44 -0
package/.agent/workflows/dasa-start-work.md +51 -0
package/.agent/workflows/dasa-status.md +58 -0
package/.agent/workflows/dasa-sync.md +39 -0
package/.agent/workflows/dasa-uninstall.md +30 -0
package/CHANGELOG.md +94 -0
package/LICENSE +21 -0
package/README.md +135 -0
package/bin/cli.js +218 -0
package/bin/dasa-cli.js +100 -0
package/package.json +37 -0

package/.agent/scripts/qa_gate.py ADDED Viewed

@@ -0,0 +1,84 @@
+#!/usr/bin/env python3
+"""
+Dasa Indra: The native QA Gate (qa_gate.py)
+Assimilates ~800 patterns from `engineering-failures-bible`.
+Provides native python text scanning against common language-specific pitfalls
+before a task can be marked complete.
+"""
+import sys
+import os
+import re
+import argparse
+from pathlib import Path
+# Embedded Failure Patterns (TOON Heuristics)
+FAILURES = {
+    "01_Memory": [
+        (r"new\s+[a-zA-Z]+\(.*\)\s*;(?!.*\bdelete\b)", "Unmatched 'new' allocation without 'delete' (C++/Leaks)"),
+        (r"fmt\.Sprintf\(\s*\"%s\"", "String concatenation in loops instead of strings.Builder (Go/Memory)"),
+        (r"\.collect::\<Vec<\w+>\>\(\)", "Unbounded heavy .collect() instead of iterators (Rust/Memory)")
+    ],
+    "02_Concurrency": [
+        (r"sync\.Mutex.*defer\s+.*Unlock", "Missing careful Lock bounds via defer (Go/Deadlock risk)"),
+        (r"std::sync::Mutex.*\.unwrap\(\)", "Poisoning panic risk on Mutex.unwrap() (Rust/Concurrency)"),
+        (r"Promise\.all\(.*\.(map|forEach)\(async", "Unbounded concurrent Promise execution. Use Promise.allSettled or batching (Node/EventLoop)")
+    ],
+    "03_Security": [
+        (r"SELECT\s+.*\s+FROM\s+.*\s+WHERE\s+.*=\s*\S+\s*\+", "Raw string concatenation in SQL queries (SQL Injection)"),
+        (r"eval\(", "Use of eval() detected (RCE vulnerability)"),
+        (r"dangerouslySetInnerHTML", "Potential XSS detected in React/Next.js component")
+    ]
+}
+def scan_file(filepath: Path) -> list:
+    issues = []
+    try:
+        content = filepath.read_text(encoding="utf-8")
+        lines = content.splitlines()
+        for idx, line in enumerate(lines, 1):
+            for domain, patterns in FAILURES.items():
+                for regex, desc in patterns:
+                    if re.search(regex, line):
+                        issues.append(f"[FAIL] {domain} | {filepath.name}:{idx} -> {desc}")
+    except Exception as e:
+        print(f"Skipping {filepath} ({e})")
+    return issues
+def main():
+    parser = argparse.ArgumentParser(description="Dasa Indra QA Gate Scanner")
+    parser.add_argument("target", help="Directory or file to scan")
+    args = parser.parse_args()
+    target_path = Path(args.target)
+    if not target_path.exists():
+        print(f"Error: Target {target_path} does not exist.")
+        sys.exit(1)
+    print(f"🕵️‍♂️ Dasa Indra: Initiating Engineering Failure Scan on {target_path}...")
+    total_issues = []
+    if target_path.is_file():
+        total_issues.extend(scan_file(target_path))
+    else:
+        for root, _, files in os.walk(target_path):
+            if ".git" in root or "node_modules" in root or "target" in root:
+                continue
+            for file in files:
+                ext = Path(file).suffix
+                if ext in [".js", ".ts", ".go", ".rs", ".java", ".php", ".py", ".cpp"]:
+                    full_path = Path(root) / file
+                    total_issues.extend(scan_file(full_path))
+    if total_issues:
+        print("\n❌ ENGINEERING FAILURES DETECTED:")
+        for issue in total_issues:
+            print(f"   {issue}")
+        print("\n>> Dasa Nala is BLOCKED from completing this task. Fix the issues first.")
+        sys.exit(1)
+    else:
+        print("\n✅ QA Gate Passed. No critical engineering failures detected.")
+        sys.exit(0)
+if __name__ == "__main__":
+    main()

package/.agent/scripts/security_scan.py ADDED Viewed

@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""
+Dasa Dharma: Secret Guardian (security_scan.py)
+Scans git diffs or specific files for leaked API keys, tokens, and secrets.
+"""
+import os
+import sys
+import subprocess
+import re
+SECRET_PATTERNS = {
+    "AWS Access Key": r"AKIA[0-9A-Z]{16}",
+    "Stripe API Key": r"sk_(test|live)_[0-9a-zA-Z]{24}",
+    "Google API Key": r"AIza[0-9A-Za-z-_]{35}",
+    "Generic Bearer/Token": r"(?i)(bearer|token|api_key|secret)['\"\s:=]+[A-Za-z0-9\-_]{20,}",
+    "Private Key": r"-----BEGIN (RSA|OPENSSH|DSA|EC|PGP) PRIVATE KEY-----"
+}
+def get_git_diff():
+    """Get the current staged and unstaged git changes."""
+    try:
+        # Get staged changes
+        staged = subprocess.check_output(["git", "diff", "--cached"]).decode('utf-8')
+        # Get unstaged changes
+        unstaged = subprocess.check_output(["git", "diff"]).decode('utf-8')
+        return staged + "\n" + unstaged
+    except subprocess.CalledProcessError:
+        print("🟡 [Dharma Guardian] Not a git repository or no commits yet. Skipping diff scan.")
+        return ""
+    except Exception as e:
+        print(f"🔴 [Dharma Guardian] Error executing git diff: {e}")
+        return ""
+def scan_diff(diff_text):
+    """Scan the diff text for secret patterns."""
+    leaks = []
+    # Only scan added/modified lines in diff (+ but not +++)
+    diff_lines = [line for line in diff_text.split('\n') if line.startswith('+') and not line.startswith('+++')]
+    for line in diff_lines:
+        for name, pattern in SECRET_PATTERNS.items():
+            if re.search(pattern, line):
+                leaks.append((name, line.strip()))
+    return leaks
+def main():
+    print("🛡️  [Dasa Dharma] Initializing Secret Guardian Scan...")
+    # 1. Scan .env files (preventing accidental .env commits)
+    try:
+        git_status = subprocess.check_output(["git", "status", "-s"]).decode('utf-8')
+        status_lines = git_status.split('\n')
+        for line in status_lines:
+            if '.env' in line and not '.example' in line:
+                 print(f"🔴 [Dharma Guardian] FATAL: You are attempting to commit a raw .env file!\nLine: {line.strip()}")
+                 sys.exit(1)
+    except subprocess.CalledProcessError:
+        pass # Not a git repo
+    # 2. Scan Git Diff for hardcoded secrets
+    diff_text = get_git_diff()
+    if not diff_text:
+        print("🟢 [Dharma Guardian] No changes to scan. Pass.")
+        sys.exit(0)
+    leaks = scan_diff(diff_text)
+    if leaks:
+        print("\n🔴 [Dharma Guardian] FATAL: Potential Secret Leaks Detected in `git diff`:")
+        for name, line in leaks:
+            print(f"  - [{name}] Found in line: {line[:80]}...")
+        print("\nHALTING COMMIT. Remove hardcoded secrets and use environment variables.")
+        sys.exit(1)
+    print("🟢 [Dharma Guardian] Security Audit Passed. No obvious secrets leaked.")
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/.agent/scripts/semantic-scan.py ADDED Viewed

@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+"""
+semantic-scan.py — Cross-platform semantic search wrapper (uses osgrep)
+Persona: Dasa Dwipa (The Scout)
+Usage:
+  python .agent/scripts/semantic-scan.py "JWT authentication middleware"
+  python .agent/scripts/semantic-scan.py "where is the login form handled"
+"""
+import sys
+import subprocess
+import shutil
+import os
+def check_osgrep():
+    return shutil.which("osgrep") is not None
+def run_semantic_search(query: str) -> int:
+    if not check_osgrep():
+        print("[!] osgrep is not installed. Install it with: npm install -g osgrep")
+        print(f"[!] Falling back to grep for: {query}")
+        result = subprocess.run(
+            ["grep", "-r", "--include=*.ts", "--include=*.py", "--include=*.js",
+             "--include=*.php", "-n", query, "."],
+            capture_output=True, text=True
+        )
+        if result.stdout:
+            print(result.stdout[:3000])
+        return result.returncode
+    # osgrep available
+    try:
+        result = subprocess.run(
+            ["osgrep", "search", query],
+            capture_output=True, text=True, timeout=30
+        )
+        print(result.stdout)
+        if result.stderr:
+            print(result.stderr, file=sys.stderr)
+        return result.returncode
+    except subprocess.TimeoutExpired:
+        print("[x] osgrep search timed out after 30s")
+        return 1
+    except FileNotFoundError:
+        print("[x] osgrep not found in PATH")
+        return 1
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python semantic-scan.py <query>")
+        sys.exit(1)
+    query = " ".join(sys.argv[1:])
+    print(f"[+] Semantic search: {query}")
+    sys.exit(run_semantic_search(query))

package/.agent/scripts/skill_search.py ADDED Viewed

@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+"""
+Dasa Dwipa: The Local Skill Indexer (skill_search.py)
+A zero-dependency semantic search to find skills locally without any cloud services.
+Scans both `.agent/skills/` and `~/.gemini/antigravity/skills/`.
+"""
+import sys
+import os
+import re
+from pathlib import Path
+def extract_yaml_frontmatter(content):
+    match = re.search(r"^---\n(.*?)\n---", content, re.DOTALL)
+    if not match:
+        return {}
+    yaml_text = match.group(1)
+    metadata = {}
+    for line in yaml_text.split("\n"):
+        if ":" in line:
+            key, val = line.split(":", 1)
+            metadata[key.strip()] = val.strip().strip("'\"")
+    return metadata
+def parse_skills_in_directory(dir_path: Path):
+    skills = []
+    if not dir_path.exists() or not dir_path.is_dir():
+        return skills
+    for root, _, files in os.walk(dir_path):
+        for file in files:
+            if file == "SKILL.md":
+                skill_path = Path(root) / file
+                try:
+                    content = skill_path.read_text(encoding="utf-8")
+                    meta = extract_yaml_frontmatter(content)
+                    if "name" in meta and "description" in meta:
+                        skills.append({
+                            "name": meta["name"],
+                            "description": meta["description"],
+                            "path": str(skill_path.parent)
+                        })
+                except Exception:
+                    pass
+    return skills
+def score_skill(skill, query_words):
+    text_corpus = (skill["name"] + " " + skill["description"]).lower()
+    score = 0
+    for word in query_words:
+        if word in text_corpus:
+            score += 1
+    return score
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: skill_search.py <query>")
+        sys.exit(1)
+    query = " ".join(sys.argv[1:]).lower()
+    query_words = set(re.findall(r'\w+', query))
+    global_dir = Path.home() / ".gemini" / "antigravity" / "skills"
+    local_dir = Path(os.getcwd()) / ".agent" / "skills"
+    all_skills = parse_skills_in_directory(global_dir) + parse_skills_in_directory(local_dir)
+    if not all_skills:
+        print("No skills found on the local machine.")
+        sys.exit(0)
+    for skill in all_skills:
+        skill["score"] = score_skill(skill, query_words)
+    ranked_skills = sorted(all_skills, key=lambda x: x["score"], reverse=True)
+    print(f"🔍 Dasa Dwipa: Ranked Skills for '{query}'\n")
+    found_any = False
+    for skill in ranked_skills[:3]:
+        if skill["score"] > 0:
+            found_any = True
+            print(f"✨ {skill['name']} (Score: {skill['score']})")
+            print(f"   Desc: {skill['description']}")
+            print(f"   Path: {skill['path']}\n")
+    if not found_any:
+        print("No relevant skills matched your query. Try broadening your terms.")
+if __name__ == "__main__":
+    main()

package/.agent/scripts/status_parser.py ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+Dasa Kala: The Reporter (status_parser.py)
+Merges data from task.md and git diff --stat to output a 3-line JSON summary.
+Prevents Kala from wasting context reading entire task checklists.
+"""
+import os
+import sys
+import subprocess
+import json
+def get_task_stats():
+    """Read task.md and count checkboxes to determine progress."""
+    task_path = ".artifacts/task.md"
+    if not os.path.exists(task_path):
+        # Fallback to older location
+        task_path = ".agent/task.toon"
+        if not os.path.exists(task_path):
+             return {"total": 0, "completed": 0, "in_progress": 0}
+    total = 0
+    completed = 0
+    in_progress = 0
+    with open(task_path, "r") as f:
+        for line in f.readlines():
+            line = line.strip()
+            if line.startswith("- [ ]"):
+                total += 1
+            elif line.startswith("- [x]") or line.startswith("- [X]"):
+                total += 1
+                completed += 1
+            elif line.startswith("- [/]"):
+                total += 1
+                in_progress += 1
+    return {"total": total, "completed": completed, "in_progress": in_progress}
+def get_git_stats():
+    """Get high-level git diff stats without the actual diff content."""
+    try:
+        # Get purely the stat line e.g., "3 files changed, 50 insertions(+), 10 deletions(-)"
+        stat = subprocess.check_output(["git", "diff", "--shortstat"]).decode('utf-8').strip()
+        if not stat:
+             stat = subprocess.check_output(["git", "diff", "--cached", "--shortstat"]).decode('utf-8').strip()
+        return stat if stat else "Working tree clean"
+    except Exception:
+        return "Unknown git status"
+def main():
+    print("🛡️  [Dasa Kala] Initializing Project Status Reporter...")
+    tasks = get_task_stats()
+    git_stat = get_git_stats()
+    pct = 0
+    if tasks["total"] > 0:
+        pct = round((tasks["completed"] / tasks["total"]) * 100)
+    summary = {
+        "progress_percent": pct,
+        "tasks": f"{tasks['completed']}/{tasks['total']} ({tasks['in_progress']} active)",
+        "uncommitted_code": git_stat
+    }
+    # Write to a tiny file Kala can instantly parse
+    os.makedirs(".artifacts", exist_ok=True)
+    out_path = ".artifacts/status_summary.json"
+    with open(out_path, "w") as f:
+        json.dump(summary, f, indent=2)
+    print(f"🟢 [Kala Reporter] Status parsed. JSON Summary generated at {out_path}.")
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/.agent/scripts/test_runner.py ADDED Viewed

@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+"""
+Dasa Indra: Universal Test Watcher (test_runner.py)
+A lightweight wrapper that detects the framework, runs tests, and compresses
+massive test console outputs into a concise TOON summary to save tokens.
+"""
+import os
+import sys
+import subprocess
+import json
+from datetime import datetime
+def detect_framework():
+    """Detect the testing framework based on workspace files."""
+    if os.path.exists("package.json"):
+        with open("package.json", "r") as f:
+            content = f.read()
+            if '"jest"' in content:
+                return "npm test", "Jest"
+            if '"vitest"' in content:
+                return "npm run test", "Vitest"
+    if os.path.exists("pytest.ini") or os.path.exists("setup.py") or os.path.exists("requirements.txt"):
+        return "pytest", "PyTest"
+    if os.path.exists("go.mod"):
+        return "go test ./...", "Go Test"
+    return None, None
+def generate_toon_report(framework, output, code):
+    """Compress the raw test output into a clean TOON structure."""
+    lines = output.split('\n')
+    errors = [line for line in lines if 'FAIL' in line or 'Error' in line or 'ERR!' in line]
+    # We only take the last 50 lines to prevent token bloat
+    tail = "\n".join(lines[-50:])
+    status = "SUCCESS" if code == 0 else "FAILED"
+    report = f"""# Test Execution Report
+Framework: {framework}
+Status: {status}
+Timestamp: {datetime.now().isoformat()}
+## Summary Tail
+```text
+{tail}
+```
+"""
+    if errors and code != 0:
+        report += "\n## Detected Failures\n```text\n" + "\n".join(errors[:20]) + "\n```\n"
+    return report
+def main():
+    print("🛡️  [Dasa Indra] Initializing Universal Test Watcher...")
+    cmd, framework = detect_framework()
+    if not cmd:
+        print("🟡 [Indra Watcher] No recognized testing framework found in root. Skipping tests.")
+        sys.exit(0)
+    print(f"⚡ [Indra Watcher] Detected {framework}. Running: `{cmd}`")
+    try:
+        # Run tests and capture both stdout and stderr
+        result = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
+        output = result.stdout
+        code = result.returncode
+    except FileNotFoundError:
+        print(f"🔴 [Indra Watcher] Testing executable for '{cmd}' not found.")
+        sys.exit(1)
+    except Exception as e:
+         print(f"🔴 [Indra Watcher] Unexpected error executing tests: {e}")
+         sys.exit(1)
+    # Generate the highly compressed TOON output
+    report = generate_toon_report(framework, output, code)
+    # We write this compressed report to a file so the AI can read it efficiently
+    # instead of bloating the chat context with 10,000 lines of Jest output.
+    os.makedirs(".artifacts", exist_ok=True)
+    report_path = ".artifacts/test_report.toon"
+    with open(report_path, "w") as f:
+        f.write(report)
+    if code != 0:
+        print(f"🔴 [Indra Watcher] Tests FAILED. Details written to {report_path}")
+        sys.exit(1)
+    else:
+        print(f"🟢 [Indra Watcher] All tests passed! Summary written to {report_path}")
+        sys.exit(0)
+if __name__ == "__main__":
+    main()

package/.agent/scripts/validate_env.py ADDED Viewed

@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""
+Dasa Patih: Environment Gatekeeper (validate_env.py)
+Validates the local environment against dasa.config.toon requirements before execution.
+Ensures Python, Node, and Go (if required) are installed.
+"""
+import os
+import sys
+import subprocess
+import json
+def check_command(cmd):
+    """Check if a command exists in the system PATH."""
+    try:
+        subprocess.run([cmd, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return False
+def check_env_file():
+    """Verify standard .env files exist if a .env.example is present."""
+    if os.path.exists(".env.example") and not os.path.exists(".env"):
+        print("🔴 [Patih Gatekeeper] WARNING: .env.example exists, but .env is missing. The agent might fail without environment variables.")
+        return False
+    return True
+def parse_config():
+    """Parse dasa.config.toon for workspace paths if it exists."""
+    config_path = ".agent/dasa.config.toon"
+    if not os.path.exists(config_path):
+        return {}
+    try:
+        with open(config_path, "r") as f:
+            return json.load(f)
+    except Exception as e:
+        print(f"🔴 [Patih Gatekeeper] ERROR: Could not parse {config_path}: {e}")
+        return {}
+def main():
+    print("🛡️  [Dasa Patih] Initializing Environment Gatekeeper...")
+    config = parse_config()
+    workspaces = config.get("workspaces", {"root": "./"})
+    # 1. Check Workspaces
+    for name, path in workspaces.items():
+        if not os.path.exists(path):
+            print(f"🔴 [Patih Gatekeeper] ERROR: Configured workspace '{name}' path '{path}' does not exist.")
+            sys.exit(1)
+    # 2. Check Dependencies
+    deps = {
+        "node": check_command("node"),
+        "npm": check_command("npm"),
+        "python": check_command("python3")
+    }
+    missing = [cmd for cmd, exists in deps.items() if not exists]
+    if missing:
+        print(f"🔴 [Patih Gatekeeper] WARNING: Missing standard runtime environments: {', '.join(missing)}")
+    # 3. Check ENV
+    check_env_file()
+    print("🟢 [Patih Gatekeeper] Environment Validation Passed. Ready for execution.")
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

package/.agent/scripts/web_scraper.py ADDED Viewed

@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""
+Dasa Widya: The Extractor (web_scraper.py)
+Natively fetches URL content and strips all HTML, inline CSS, and JavaScript.
+Outputs pure markdown text to prevent massive token waste when Widya researchers.
+Zero extra dependencies required.
+"""
+import urllib.request
+import re
+import sys
+def fetch_html(url):
+    """Fetch raw HTML from a URL."""
+    try:
+        req = urllib.request.Request(
+            url,
+            data=None,
+            headers={
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+        )
+        with urllib.request.urlopen(req, timeout=10) as response:
+            return response.read().decode('utf-8', errors='ignore')
+    except Exception as e:
+        print(f"🔴 [Widya Extractor] Error fetching {url}: {e}")
+        return None
+def extract_text(html):
+    """Strip all HTML tags and noise, returning clean text."""
+    if not html:
+        return ""
+    # 1. Remove script and style blocks completely
+    text = re.sub(r'<script.*?</script>', '', html, flags=re.IGNORECASE | re.DOTALL)
+    text = re.sub(r'<style.*?</style>', '', text, flags=re.IGNORECASE | re.DOTALL)
+    # 2. Replace common block elements with newlines for formatting
+    text = re.sub(r'</?(p|div|br|h[1-6]|li|tr|table|ul|ol|header|footer|nav)[^>]*>', '\n', text, flags=re.IGNORECASE)
+    # 3. Strip all remaining HTML tags
+    text = re.sub(r'<[^>]+>', '', text)
+    # 4. Clean up whitespace and empty lines
+    lines = [line.strip() for line in text.split('\n')]
+    cleaned_lines = [line for line in lines if line]
+    return '\n\n'.join(cleaned_lines)
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python3 web_scraper.py <URL>")
+        sys.exit(1)
+    url = sys.argv[1]
+    # Basic validation
+    if not url.startswith('http'):
+         url = 'https://' + url
+    print(f"🛡️  [Dasa Widya] Extracting clean text from: {url}")
+    html = fetch_html(url)
+    if not html:
+         sys.exit(1)
+    clean_text = extract_text(html)
+    # Ensure artifacts directory exists
+    import os
+    os.makedirs(".artifacts", exist_ok=True)
+    out_path = ".artifacts/research_extraction.toon"
+    with open(out_path, "w") as f:
+        # Truncate to roughly 15,000 chars to ensure we don't blow out context
+        content = clean_text[:15000]
+        if len(clean_text) > 15000:
+             content += "\n\n... [CONTENT TRUNCATED FOR TOKEN SAFETY] ..."
+        f.write(content)
+    print(f"🟢 [Widya Extractor] Successfully stripped HTML noise.")
+    print(f"Clean markdown saved to {out_path} ({len(clean_text)} chars).")
+    sys.exit(0)
+if __name__ == "__main__":
+    main()