npm - @musashishao/agent-kit - Versions diffs - 1.3.0 → 1.4.1 - Mend

@musashishao/agent-kit 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.agent/scripts/ak_cli.py +56 -21
package/.agent/skills/graph-mapper/scripts/generate_graph.py +221 -19
package/.agent/skills/rag-engineering/scripts/chunk_code.py +327 -17
package/docs/PLAN-universal-intelligence.md +48 -0
package/package.json +1 -1

package/.agent/scripts/ak_cli.py CHANGED Viewed

@@ -179,17 +179,25 @@ def cmd_init(args: argparse.Namespace) -> int:
     print(f"\n[{steps_completed + 1}/{total_steps}] Generating dependency graph...")
     graph_script = kit_path / "skills" / "graph-mapper" / "scripts" / "generate_graph.py"
-    src_dir = project_root / "src"
-    if not src_dir.exists():
-        src_dir = project_root / "app"
+    # Smart source detection
+    src_dir = None
+    for folder in ["src", "app", "lib", "scripts", "components", "docs"]:
+        if (project_root / folder).exists():
+            src_dir = project_root / folder
+            break
+    if src_dir is None:
+        # Fallback to project root
+        src_dir = project_root
-    if src_dir.exists() and graph_script.exists():
+    if graph_script.exists():
         result = subprocess.run(
             [
                 "python3", str(graph_script),
                 "--src", str(src_dir),
                 "--output", str(agent_dir / "graph.json"),
                 "--format", "both",
+                "--lang", "universal",
             ],
             capture_output=True,
             text=True,
@@ -199,7 +207,7 @@ def cmd_init(args: argparse.Namespace) -> int:
         else:
             print("  ⚠️ No source code found or script failed")
     else:
-        print("  ⚠️ No src/ or app/ directory found, skipping")
+        print("  ⚠️ Graph script not found, skipping")
     steps_completed += 1
     # Step 5: Configure AI hosts
@@ -258,6 +266,29 @@ def cmd_init(args: argparse.Namespace) -> int:
 # Command: sync
 # ============================================================================
+def find_source_dir(project_root: Path) -> Path:
+    """Intelligently find the source directory."""
+    # Priority folders
+    for folder in ["src", "app", "lib", "scripts", "components"]:
+        if (project_root / folder).exists():
+            return project_root / folder
+    # Check if there are source files in root
+    source_extensions = {".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".c", ".cpp", ".cs", ".md"}
+    has_source_files = False
+    for item in project_root.iterdir():
+        if item.is_file() and item.suffix in source_extensions:
+            has_source_files = True
+            break
+    # Check for docs folder
+    if (project_root / "docs").exists():
+        return project_root / "docs"
+    # Fallback: use project root if it has any content, otherwise return root anyway
+    return project_root
 def cmd_sync(args: argparse.Namespace) -> int:
     """Sync AI infrastructure data."""
     project_root = Path(args.project_root).resolve()
@@ -271,6 +302,10 @@ def cmd_sync(args: argparse.Namespace) -> int:
         print("❌ .agent directory not found. Run 'ak init' first.")
         return 1
+    # Determine source directory
+    src_dir = find_source_dir(project_root)
+    print(f"🔍 Detected source directory: {src_dir.relative_to(project_root) if src_dir != project_root else '.'}")
     # Determine what to sync
     targets = []
     if args.target == "all":
@@ -285,17 +320,19 @@ def cmd_sync(args: argparse.Namespace) -> int:
         print("\n📊 Updating dependency graph...")
         graph_script = kit_path / "skills" / "graph-mapper" / "scripts" / "generate_graph.py"
-        src_dir = project_root / "src"
-        if not src_dir.exists():
-            src_dir = project_root / "app"
-        if src_dir.exists() and graph_script.exists():
+        if not graph_script.exists():
+            print(f"  ❌ Graph script not found at: {graph_script}")
+            success = False
+        elif not src_dir.exists():
+            print(f"  ⚠️ Source directory {src_dir} not found")
+        else:
             result = subprocess.run(
                 [
                     "python3", str(graph_script),
                     "--src", str(src_dir),
                     "--output", str(agent_dir / "graph.json"),
                     "--format", "both",
+                    "--lang", "universal",
                 ],
                 capture_output=True,
                 text=True,
@@ -303,26 +340,26 @@ def cmd_sync(args: argparse.Namespace) -> int:
             if result.returncode == 0:
                 print("  ✅ Graph updated")
             else:
-                print(f"  ❌ Graph sync failed: {result.stderr[:200]}")
+                print(f"  ❌ Graph sync failed: {result.stderr}")
                 success = False
-        else:
-            print("  ⚠️ Source directory or script not found")
     # Sync RAG
     if "rag" in targets:
         print("\n📚 Updating RAG chunks...")
         rag_script = kit_path / "skills" / "rag-engineering" / "scripts" / "chunk_code.py"
-        src_dir = project_root / "src"
-        if not src_dir.exists():
-            src_dir = project_root / "app"
-        if src_dir.exists() and rag_script.exists():
+        if not rag_script.exists():
+            print(f"  ❌ RAG script not found at: {rag_script}")
+            success = False
+        elif not src_dir.exists():
+            print(f"  ⚠️ Source directory {src_dir} not found")
+        else:
             result = subprocess.run(
                 [
                     "python3", str(rag_script),
                     "--src", str(src_dir),
                     "--output", str(agent_dir / "rag" / "chunks.json"),
+                    "--lang", "universal",
                 ],
                 capture_output=True,
                 text=True,
@@ -330,10 +367,8 @@ def cmd_sync(args: argparse.Namespace) -> int:
             if result.returncode == 0:
                 print("  ✅ RAG chunks updated")
             else:
-                print(f"  ❌ RAG sync failed: {result.stderr[:200]}")
+                print(f"  ❌ RAG sync failed: {result.stderr}")
                 success = False
-        else:
-            print("  ⚠️ Source directory or script not found")
     # Update timestamp cache
     cache_file = agent_dir / ".cache" / "timestamps.json"

package/.agent/skills/graph-mapper/scripts/generate_graph.py CHANGED Viewed

@@ -311,6 +311,165 @@ class PythonAnalyzer:
         return nodes
+class MarkdownAnalyzer:
+    """Analyzes Markdown files for internal links and references."""
+    EXTENSIONS = {'.md', '.mdx', '.markdown'}
+    # Regex patterns for link detection
+    LINK_PATTERNS = [
+        # Standard markdown link: [text](path)
+        r'\[([^\]]+)\]\(([^)]+)\)',
+        # Wikilink: [[path]] or [[path|text]]
+        r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]',
+    ]
+    # Pattern for image refs (also a form of dependency)
+    IMAGE_PATTERN = r'!\[([^\]]*)\]\(([^)]+)\)'
+    def __init__(self, base_path: Path, exclude_patterns: List[str]):
+        self.base_path = base_path
+        self.exclude_patterns = exclude_patterns
+    def should_exclude(self, path: Path) -> bool:
+        """Check if path should be excluded."""
+        path_str = str(path)
+        for pattern in self.exclude_patterns:
+            if pattern in path_str:
+                return True
+        return False
+    def analyze_file(self, file_path: Path) -> Optional[Node]:
+        """Analyze a single Markdown file for links."""
+        if self.should_exclude(file_path):
+            return None
+        try:
+            content = file_path.read_text(encoding='utf-8')
+        except (UnicodeDecodeError, PermissionError):
+            return None
+        relative_path = str(file_path.relative_to(self.base_path))
+        node_type = self._detect_type(relative_path, content)
+        # Extract internal links
+        links = []
+        # Standard markdown links
+        for match in re.findall(self.LINK_PATTERNS[0], content):
+            link_path = match[1]
+            resolved = self._resolve_link(link_path, file_path)
+            if resolved:
+                links.append(resolved)
+        # Wikilinks
+        for match in re.findall(self.LINK_PATTERNS[1], content):
+            resolved = self._resolve_wikilink(match)
+            if resolved:
+                links.append(resolved)
+        # Extract "exports" (main topics/headings)
+        exports = []
+        # Get main title (first h1)
+        title_match = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
+        if title_match:
+            exports.append(title_match.group(1).strip())
+        # Get h2 sections as additional exports
+        for match in re.findall(r'^##\s+(.+)$', content, re.MULTILINE):
+            exports.append(match.strip())
+        return Node(
+            id=relative_path,
+            type=node_type,
+            path=relative_path,
+            imports=list(set(links)),  # Links = imports in docs context
+            exports=exports[:10]  # Limit to first 10 headings
+        )
+    def _detect_type(self, path: str, content: str) -> str:
+        """Detect the type of documentation file."""
+        path_lower = path.lower()
+        if 'readme' in path_lower:
+            return 'readme'
+        elif 'changelog' in path_lower or 'history' in path_lower:
+            return 'changelog'
+        elif 'contributing' in path_lower:
+            return 'contributing'
+        elif 'license' in path_lower:
+            return 'license'
+        elif '/docs/' in path_lower or path_lower.startswith('docs/'):
+            return 'documentation'
+        elif '/guides/' in path_lower or '/tutorials/' in path_lower:
+            return 'guide'
+        elif '/api/' in path_lower or 'api' in path_lower:
+            return 'api_doc'
+        elif 'plan' in path_lower or 'roadmap' in path_lower:
+            return 'plan'
+        else:
+            return 'document'
+    def _resolve_link(self, link_path: str, from_file: Path) -> Optional[str]:
+        """Resolve a markdown link to a relative path."""
+        # Skip external links
+        if link_path.startswith(('http://', 'https://', 'mailto:', '#')):
+            return None
+        # Skip anchor-only links
+        if link_path.startswith('#'):
+            return None
+        # Remove anchor from path
+        if '#' in link_path:
+            link_path = link_path.split('#')[0]
+        if not link_path:
+            return None
+        # Resolve relative path
+        from_dir = from_file.parent
+        resolved = (from_dir / link_path).resolve()
+        try:
+            relative = str(resolved.relative_to(self.base_path))
+            # Check if file exists
+            if resolved.exists():
+                return relative
+            return None
+        except ValueError:
+            return None
+    def _resolve_wikilink(self, link_name: str) -> Optional[str]:
+        """Resolve a wikilink to a file path."""
+        # Search for file matching the wikilink name
+        search_name = link_name.strip()
+        # Try exact match with .md extension
+        for ext in self.EXTENSIONS:
+            for file_path in self.base_path.rglob(f"*{ext}"):
+                if file_path.stem.lower() == search_name.lower():
+                    try:
+                        return str(file_path.relative_to(self.base_path))
+                    except ValueError:
+                        continue
+        return None
+    def analyze_directory(self, directory: Path) -> List[Node]:
+        """Analyze all Markdown files in a directory."""
+        nodes = []
+        for ext in self.EXTENSIONS:
+            for file_path in directory.rglob(f'*{ext}'):
+                if file_path.is_file():
+                    node = self.analyze_file(file_path)
+                    if node:
+                        nodes.append(node)
+        return nodes
 def build_edges(nodes: List[Node]) -> List[Edge]:
     """Build edges from node imports."""
     edges = []
@@ -424,13 +583,16 @@ def generate_markdown(graph: Graph, output_path: Path):
 def main():
-    parser = argparse.ArgumentParser(description='Generate dependency graph')
+    parser = argparse.ArgumentParser(
+        description='Universal Dependency Graph Generator - Code and Documentation'
+    )
     parser.add_argument('--src', default='./src', help='Source directory')
     parser.add_argument('--output', default='.agent/graph.json', help='Output file')
     parser.add_argument('--format', choices=['json', 'markdown', 'both'], default='both')
-    parser.add_argument('--lang', choices=['typescript', 'python', 'auto'], default='auto')
+    parser.add_argument('--lang', choices=['typescript', 'python', 'auto', 'universal'],
+                       default='universal', help='Language mode (universal = Code + Markdown)')
     parser.add_argument('--depth', type=int, default=3, help='Max depth for impact analysis')
-    parser.add_argument('--exclude', default='node_modules,__pycache__,.git,dist,build',
+    parser.add_argument('--exclude', default='node_modules,__pycache__,.git,dist,build,.agent',
                        help='Comma-separated patterns to exclude')
     args = parser.parse_args()
@@ -443,37 +605,77 @@ def main():
         print(f"Error: Source directory '{src_path}' does not exist")
         return 1
-    # Detect language if auto
+    # Collect all nodes
+    all_nodes = []
     lang = args.lang
-    if lang == 'auto':
+    if lang == 'universal':
+        print("Universal mode: Analyzing Code + Markdown files")
+        # Analyze TypeScript/JavaScript
+        ts_analyzer = TypeScriptAnalyzer(src_path, exclude_patterns)
+        ts_nodes = ts_analyzer.analyze_directory(src_path)
+        all_nodes.extend(ts_nodes)
+        print(f"  TypeScript/JS: {len(ts_nodes)} files")
+        # Analyze Python
+        py_analyzer = PythonAnalyzer(src_path, exclude_patterns)
+        py_nodes = py_analyzer.analyze_directory(src_path)
+        all_nodes.extend(py_nodes)
+        print(f"  Python: {len(py_nodes)} files")
+        # Analyze Markdown
+        md_analyzer = MarkdownAnalyzer(src_path, exclude_patterns)
+        md_nodes = md_analyzer.analyze_directory(src_path)
+        all_nodes.extend(md_nodes)
+        print(f"  Markdown: {len(md_nodes)} files")
+    elif lang == 'auto':
         ts_files = list(src_path.rglob('*.ts')) + list(src_path.rglob('*.tsx'))
         py_files = list(src_path.rglob('*.py'))
         lang = 'typescript' if len(ts_files) >= len(py_files) else 'python'
         print(f"Auto-detected language: {lang}")
-    # Analyze based on language
-    if lang == 'typescript':
-        analyzer = TypeScriptAnalyzer(src_path, exclude_patterns)
+        if lang == 'typescript':
+            analyzer = TypeScriptAnalyzer(src_path, exclude_patterns)
+        else:
+            analyzer = PythonAnalyzer(src_path, exclude_patterns)
+        all_nodes = analyzer.analyze_directory(src_path)
     else:
-        analyzer = PythonAnalyzer(src_path, exclude_patterns)
+        # Specific language
+        if lang == 'typescript':
+            analyzer = TypeScriptAnalyzer(src_path, exclude_patterns)
+        else:
+            analyzer = PythonAnalyzer(src_path, exclude_patterns)
+        all_nodes = analyzer.analyze_directory(src_path)
-    print(f"Analyzing {src_path}...")
-    nodes = analyzer.analyze_directory(src_path)
-    print(f"Found {len(nodes)} files")
+    print(f"Total: {len(all_nodes)} files")
+    if len(all_nodes) == 0:
+        print("Warning: No files found. Check source directory and exclude patterns.")
+        print("Supported: .ts, .tsx, .js, .jsx, .py, .md, .mdx")
     # Build graph
-    edges = build_edges(nodes)
-    print(f"Found {len(edges)} dependencies")
+    edges = build_edges(all_nodes)
+    print(f"Found {len(edges)} dependencies/links")
+    # Categorize by type
+    type_counts = {}
+    for node in all_nodes:
+        type_counts[node.type] = type_counts.get(node.type, 0) + 1
     graph = Graph(
-        nodes=nodes,
+        nodes=all_nodes,
         edges=edges,
         metadata={
             "generated_at": datetime.now().isoformat(),
             "source_path": str(src_path),
-            "language": lang,
-            "total_files": len(nodes),
-            "total_edges": len(edges)
+            "mode": lang,
+            "total_files": len(all_nodes),
+            "total_edges": len(edges),
+            "file_types": type_counts
         }
     )

package/.agent/skills/rag-engineering/scripts/chunk_code.py CHANGED Viewed

@@ -505,29 +505,320 @@ class PythonChunker:
         )
+class MarkdownChunker:
+    """Chunk Markdown files by heading sections."""
+    EXTENSIONS = {'.md', '.mdx', '.markdown'}
+    def __init__(self, max_chunk_size: int = 2000, overlap: int = 100):
+        self.max_chunk_size = max_chunk_size
+        self.overlap = overlap
+    def chunk_file(self, file_path: Path, base_path: Path) -> List[Chunk]:
+        """Chunk a Markdown file by headings."""
+        try:
+            content = file_path.read_text(encoding='utf-8')
+        except (UnicodeDecodeError, PermissionError):
+            return []
+        relative_path = str(file_path.relative_to(base_path))
+        chunks = []
+        # Extract sections by heading
+        sections = self._extract_sections(content)
+        if sections:
+            for section in sections:
+                chunk_content = section['content']
+                # If section is too large, split it
+                if len(chunk_content) > self.max_chunk_size:
+                    sub_chunks = self._split_by_paragraphs(chunk_content)
+                    for i, sub in enumerate(sub_chunks):
+                        chunks.append(self._create_chunk(
+                            content=sub,
+                            file_path=relative_path,
+                            section_title=f"{section['title']}_part{i+1}",
+                            heading_level=section['level'],
+                            parent_headings=section['parents'],
+                            start_line=section['start_line'],
+                            end_line=section['end_line']
+                        ))
+                else:
+                    chunks.append(self._create_chunk(
+                        content=chunk_content,
+                        file_path=relative_path,
+                        section_title=section['title'],
+                        heading_level=section['level'],
+                        parent_headings=section['parents'],
+                        start_line=section['start_line'],
+                        end_line=section['end_line']
+                    ))
+        else:
+            # No headings found, treat entire file as one chunk or split by paragraphs
+            if len(content) <= self.max_chunk_size:
+                chunks.append(self._create_chunk(
+                    content=content,
+                    file_path=relative_path,
+                    section_title=file_path.stem,
+                    heading_level=0,
+                    parent_headings=[],
+                    start_line=1,
+                    end_line=content.count('\n') + 1
+                ))
+            else:
+                sub_chunks = self._split_by_paragraphs(content)
+                for i, sub in enumerate(sub_chunks):
+                    chunks.append(self._create_chunk(
+                        content=sub,
+                        file_path=relative_path,
+                        section_title=f"{file_path.stem}_part{i+1}",
+                        heading_level=0,
+                        parent_headings=[],
+                        start_line=1,
+                        end_line=content.count('\n') + 1
+                    ))
+        return chunks
+    def _extract_sections(self, content: str) -> List[Dict]:
+        """Extract sections based on Markdown headings."""
+        sections = []
+        lines = content.split('\n')
+        heading_pattern = re.compile(r'^(#{1,6})\s+(.+)$')
+        current_section = None
+        current_lines = []
+        parent_stack = []  # Track parent headings for context
+        for i, line in enumerate(lines):
+            match = heading_pattern.match(line)
+            if match:
+                # Save previous section
+                if current_section:
+                    current_section['content'] = '\n'.join(current_lines).strip()
+                    current_section['end_line'] = i
+                    if current_section['content']:
+                        sections.append(current_section)
+                # Update parent stack
+                level = len(match.group(1))
+                title = match.group(2).strip()
+                # Pop parents that are same or deeper level
+                while parent_stack and parent_stack[-1]['level'] >= level:
+                    parent_stack.pop()
+                parents = [p['title'] for p in parent_stack]
+                # Start new section
+                current_section = {
+                    'title': title,
+                    'level': level,
+                    'parents': parents.copy(),
+                    'start_line': i + 1,
+                    'end_line': i + 1,
+                    'content': ''
+                }
+                current_lines = [line]
+                # Add this heading to parent stack
+                parent_stack.append({'level': level, 'title': title})
+            elif current_section:
+                current_lines.append(line)
+        # Don't forget last section
+        if current_section:
+            current_section['content'] = '\n'.join(current_lines).strip()
+            current_section['end_line'] = len(lines)
+            if current_section['content']:
+                sections.append(current_section)
+        return sections
+    def _split_by_paragraphs(self, content: str) -> List[str]:
+        """Split content by paragraphs when too large."""
+        chunks = []
+        paragraphs = re.split(r'\n\s*\n', content)
+        current_chunk = []
+        current_size = 0
+        for para in paragraphs:
+            para_size = len(para) + 2  # +2 for paragraph break
+            if current_size + para_size > self.max_chunk_size and current_chunk:
+                chunks.append('\n\n'.join(current_chunk))
+                current_chunk = []
+                current_size = 0
+            current_chunk.append(para)
+            current_size += para_size
+        if current_chunk:
+            chunks.append('\n\n'.join(current_chunk))
+        return chunks
+    def _create_chunk(
+        self,
+        content: str,
+        file_path: str,
+        section_title: str,
+        heading_level: int,
+        parent_headings: List[str],
+        start_line: int,
+        end_line: int
+    ) -> Chunk:
+        """Create a Chunk object with rich context."""
+        # Build context string for better retrieval
+        context_path = ' > '.join(parent_headings + [section_title]) if parent_headings else section_title
+        return Chunk(
+            id=generate_chunk_id(file_path, content),
+            content=content,
+            metadata={
+                'file_path': file_path,
+                'file_type': 'markdown',
+                'chunk_type': f'heading_{heading_level}' if heading_level > 0 else 'paragraph',
+                'name': section_title,
+                'context_path': context_path,
+                'heading_level': heading_level,
+                'parent_headings': parent_headings,
+                'start_line': start_line,
+                'end_line': end_line,
+                'char_count': len(content),
+                'line_count': content.count('\n') + 1
+            }
+        )
+class TextChunker:
+    """Chunk plain text files by paragraphs."""
+    EXTENSIONS = {'.txt', '.text', '.log'}
+    def __init__(self, max_chunk_size: int = 1500, overlap: int = 100):
+        self.max_chunk_size = max_chunk_size
+        self.overlap = overlap
+    def chunk_file(self, file_path: Path, base_path: Path) -> List[Chunk]:
+        """Chunk a text file by paragraphs."""
+        try:
+            content = file_path.read_text(encoding='utf-8')
+        except (UnicodeDecodeError, PermissionError):
+            return []
+        relative_path = str(file_path.relative_to(base_path))
+        chunks = []
+        if len(content) <= self.max_chunk_size:
+            chunks.append(self._create_chunk(
+                content=content,
+                file_path=relative_path,
+                name=file_path.stem,
+                start_line=1,
+                end_line=content.count('\n') + 1
+            ))
+        else:
+            # Split by paragraphs
+            paragraphs = re.split(r'\n\s*\n', content)
+            current_chunk = []
+            current_size = 0
+            chunk_index = 0
+            for para in paragraphs:
+                para_size = len(para) + 2
+                if current_size + para_size > self.max_chunk_size and current_chunk:
+                    chunk_index += 1
+                    chunks.append(self._create_chunk(
+                        content='\n\n'.join(current_chunk),
+                        file_path=relative_path,
+                        name=f"{file_path.stem}_part{chunk_index}",
+                        start_line=1,
+                        end_line=content.count('\n') + 1
+                    ))
+                    current_chunk = []
+                    current_size = 0
+                current_chunk.append(para)
+                current_size += para_size
+            if current_chunk:
+                chunk_index += 1
+                chunks.append(self._create_chunk(
+                    content='\n\n'.join(current_chunk),
+                    file_path=relative_path,
+                    name=f"{file_path.stem}_part{chunk_index}",
+                    start_line=1,
+                    end_line=content.count('\n') + 1
+                ))
+        return chunks
+    def _create_chunk(
+        self,
+        content: str,
+        file_path: str,
+        name: str,
+        start_line: int,
+        end_line: int
+    ) -> Chunk:
+        """Create a Chunk object."""
+        return Chunk(
+            id=generate_chunk_id(file_path, content),
+            content=content,
+            metadata={
+                'file_path': file_path,
+                'file_type': 'text',
+                'chunk_type': 'paragraph',
+                'name': name,
+                'start_line': start_line,
+                'end_line': end_line,
+                'char_count': len(content),
+                'line_count': content.count('\n') + 1
+            }
+        )
 def chunk_directory(
     src_path: Path,
     lang: str,
     exclude_patterns: List[str],
     max_chunk_size: int = 1500
 ) -> List[Chunk]:
-    """Chunk all files in a directory."""
+    """Chunk all files in a directory - Universal support for Code, Docs, and Text."""
     all_chunks = []
-    # Select chunker
-    if lang == 'typescript':
-        chunker = TypeScriptChunker(max_chunk_size=max_chunk_size)
-        extensions = TypeScriptChunker.EXTENSIONS
-    else:
-        chunker = PythonChunker(max_chunk_size=max_chunk_size)
-        extensions = {'.py'}
+    # Initialize all chunkers
+    ts_chunker = TypeScriptChunker(max_chunk_size=max_chunk_size)
+    py_chunker = PythonChunker(max_chunk_size=max_chunk_size)
+    md_chunker = MarkdownChunker(max_chunk_size=max_chunk_size)
+    txt_chunker = TextChunker(max_chunk_size=max_chunk_size)
+    # Map extensions to chunkers
+    extension_map = {}
+    for ext in TypeScriptChunker.EXTENSIONS:
+        extension_map[ext] = ts_chunker
+    extension_map['.py'] = py_chunker
+    for ext in MarkdownChunker.EXTENSIONS:
+        extension_map[ext] = md_chunker
+    for ext in TextChunker.EXTENSIONS:
+        extension_map[ext] = txt_chunker
+    # All supported extensions
+    all_extensions = set(extension_map.keys())
     # Process files
     for file_path in src_path.rglob('*'):
         if not file_path.is_file():
             continue
-        if file_path.suffix not in extensions:
+        # Check if extension is supported
+        if file_path.suffix not in all_extensions:
             continue
         # Check exclusions
@@ -535,19 +826,25 @@ def chunk_directory(
         if any(pattern in path_str for pattern in exclude_patterns):
             continue
-        chunks = chunker.chunk_file(file_path, src_path)
-        all_chunks.extend(chunks)
+        # Select appropriate chunker
+        chunker = extension_map.get(file_path.suffix)
+        if chunker:
+            chunks = chunker.chunk_file(file_path, src_path)
+            all_chunks.extend(chunks)
     return all_chunks
 def main():
-    parser = argparse.ArgumentParser(description='Chunk code files')
+    parser = argparse.ArgumentParser(
+        description='Universal Chunker - Code, Markdown, and Text files'
+    )
     parser.add_argument('--src', default='./src', help='Source directory')
     parser.add_argument('--output', default='.agent/rag/chunks.json', help='Output file')
-    parser.add_argument('--lang', choices=['typescript', 'python', 'auto'], default='auto')
+    parser.add_argument('--lang', choices=['typescript', 'python', 'auto', 'universal'],
+                       default='universal', help='Language mode (universal = all file types)')
     parser.add_argument('--max-size', type=int, default=1500, help='Max chunk size in chars')
-    parser.add_argument('--exclude', default='node_modules,__pycache__,.git,dist,build',
+    parser.add_argument('--exclude', default='node_modules,__pycache__,.git,dist,build,.agent',
                        help='Patterns to exclude')
     args = parser.parse_args()
@@ -560,28 +857,41 @@ def main():
         print(f"Error: Source directory '{src_path}' does not exist")
         return 1
-    # Auto-detect language
+    # Mode selection
     lang = args.lang
     if lang == 'auto':
         ts_files = list(src_path.rglob('*.ts')) + list(src_path.rglob('*.tsx'))
         py_files = list(src_path.rglob('*.py'))
         lang = 'typescript' if len(ts_files) >= len(py_files) else 'python'
         print(f"Auto-detected language: {lang}")
+    elif lang == 'universal':
+        print("Universal mode: Processing Code, Markdown, and Text files")
     print(f"Chunking {src_path}...")
     chunks = chunk_directory(src_path, lang, exclude_patterns, args.max_size)
     print(f"Created {len(chunks)} chunks")
+    if len(chunks) == 0:
+        print("Warning: No chunks created. Check if source directory has supported files.")
+        print("Supported: .ts, .tsx, .js, .jsx, .py, .md, .mdx, .txt")
     # Save output
     output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Categorize chunks by file type
+    file_types = {}
+    for c in chunks:
+        ft = c.metadata.get('file_type', 'unknown')
+        file_types[ft] = file_types.get(ft, 0) + 1
     output_data = {
         'metadata': {
             'generated_at': datetime.now().isoformat(),
             'source_path': str(src_path),
-            'language': lang,
+            'mode': lang,
             'total_chunks': len(chunks),
-            'max_chunk_size': args.max_size
+            'max_chunk_size': args.max_size,
+            'file_types': file_types
         },
         'chunks': [asdict(c) for c in chunks]
     }

package/docs/PLAN-universal-intelligence.md ADDED Viewed

@@ -0,0 +1,48 @@
+# PLAN: Universal Intelligence Engine (UIE)
+> Mục tiêu: Nâng cấp Agent Kit để hỗ trợ mọi loại folder làm việc (Docs, Research, Code) với khả năng thấu hiểu tri thức toàn diện.
+## Phase 1: Universal RAG (Smart Chunking cho mọi định dạng) ✅ COMPLETED
+- [x] **Nâng cấp `chunk_code.py` thành Universal Chunker**:
+    - [x] Thêm `MarkdownChunker`: Cắt segment theo Heading (`#`, `##`, `###`).
+    - [x] Thêm `TextChunker`: Cắt theo đoạn văn (Paragraph).
+    - [x] Giữ nguyên khả năng cắt Code (TS, Python) sử dụng AST.
+- [x] **Contextual Metadata cho Docs**:
+    - [x] Tự động trích xuất tiêu đề chính của file làm context cho từng chunk bên trong.
+    - [x] Ghi nhận `context_path` (Parent > Child heading) để AI biết ngữ cảnh.
+## Phase 2: Knowledge Graph cho Tài liệu (Relationship Mapping) ✅ COMPLETED
+- [x] **Nâng cấp `generate_graph.py`**:
+    - [x] Thêm `MarkdownAnalyzer`: Quét link nội bộ `[text](file.md)` và wikilink `[[file]]`.
+    - [x] Nhận diện heading exports (H1, H2) làm "API" của tài liệu.
+    - [x] Chế độ `universal`: Kết hợp Code + Markdown trong cùng một graph.
+## Phase 3: MCP Gateway Enhancements
+- [ ] **Content-Type Awareness**:
+    - [ ] Tool `analyze_dependencies` trả về "References" thay vì "Imports" cho docs.
+    - [ ] Tool `search_code_logic` alias thành `search_knowledge`.
+- [ ] **Auto-Detection**: gateway báo cáo loại dự án (Creative, Tech, Mixed).
+## Phase 4: CLI Evolution (`ak` command)
+- [ ] **`ak init` thông minh**:
+    - [ ] Không ép buộc phải có `src` hay `app`.
+    - [ ] Tự động tạo `AGENTS.md` theo template "General Knowledge" nếu không phát hiện code.
+- [ ] **`ak sync` toàn diện**:
+    - [ ] Quét mọi file (ngo trừ ignore list) để đảm bảo không bỏ sót tri thức nào.
+## Phase 5: Verification & Beta Test
+- [ ] **Test Case 1**: Dự án chỉ gồm 100 file Markdown (Hướng dẫn sử dụng).
+- [ ] **Test Case 2**: Dự án hỗn hợp (Next.js + Docs + API Specs).
+- [ ] **Test Case 3**: Dự án nghiên cứu (Nhiều file .txt và ghi chú rời rạc).
+---
+## Agent Assignments
+- **Python Specialist**: Xử lý logic Chunker và Graph Mapper (Phase 1 & 2).
+- **TypeScript Expert**: Cập nhật MCP Gateway (Phase 3).
+- **Orchestrator**: Cập nhật CLI và hoàn thiện Docs (Phase 4).
+## Verification Checklist
+- [ ] `ai status` hiển thị ✅ RAG và ✅ Graph ngay cả trong folder không có code.
+- [ ] AI có thể tìm thấy thông tin nằm sâu trong một sub-section của file Markdown dài.
+- [ ] Dependency graph hiển thị được mối liên kết giữa các file tài liệu.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@musashishao/agent-kit",
-  "version": "1.3.0",
+  "version": "1.4.1",
   "description": "AI Agent templates - Skills, Agents, Workflows, and AI-Ready Data Infrastructure Gateway",
   "main": "index.js",
   "bin": {