code-assembler-pro 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ """
2
+ Code Assembler Pro - A tool for consolidating source code for LLM analysis.
3
+ """
4
+
5
+ from .core import assemble_codebase, assemble_from_config
6
+ from .config import AssemblerConfig
7
+ from .constants import __version__
8
+ from .interactive import run_interactive_mode
9
+
10
+ __all__ = [
11
+ "assemble_codebase",
12
+ "assemble_from_config",
13
+ "AssemblerConfig",
14
+ "run_interactive_mode",
15
+ "__version__",
16
+ ]
@@ -0,0 +1,23 @@
1
+ """
2
+ Entry point for the code_assembler package execution.
3
+ """
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ # Calculate the 'src' directory path (one level above this file)
8
+ src_path = str(Path(__file__).resolve().parent.parent)
9
+
10
+ # Add it to the Python search path if it's not already there
11
+ if src_path not in sys.path:
12
+ sys.path.insert(0, src_path)
13
+
14
+ # Now perform an ABSOLUTE import (without the dot)
15
+ # This works because 'src' is in the path, making 'code_assembler' visible
16
+ try:
17
+ from code_assembler.cli import main
18
+ except ImportError:
19
+ # Fallback for when the package is already properly installed via pip
20
+ from .cli import main
21
+
22
+ if __name__ == "__main__":
23
+ main()
@@ -0,0 +1,152 @@
1
+ """
2
+ Architecture and Quality analyzers for Code Assembler Pro.
3
+
4
+ This module extracts structural data and patterns from the codebase
5
+ without handling formatting (delegated to templates).
6
+ """
7
+ import os
8
+ from collections import defaultdict
9
+ from pathlib import Path
10
+ from typing import List, Dict, Set, Any
11
+
12
+ from .config import FileEntry, CodebaseStats
13
+ from .constants import LANGUAGE_MAP
14
+
15
+
16
+ class ArchitectureAnalyzer:
17
+ """Analyzes codebase structure and detects patterns, returning raw data."""
18
+
19
+ def __init__(self, entries: List[FileEntry], stats: CodebaseStats):
20
+ """
21
+ Initialize analyzer.
22
+
23
+ Args:
24
+ entries: List of file entries
25
+ stats: Codebase statistics
26
+ """
27
+ self.entries = entries
28
+ self.stats = stats
29
+
30
+ def analyze_data(self) -> Dict[str, Any]:
31
+ """
32
+ Perform complete architecture analysis and return raw data.
33
+ """
34
+ # Calculate depth distribution first to update self.stats.max_depth
35
+ depth_dist = self._get_depth_distribution()
36
+
37
+ return {
38
+ "components": self._get_components(),
39
+ "distribution": self._get_distribution(),
40
+ "patterns": self._get_patterns(),
41
+ "max_depth": self.stats.max_depth,
42
+ "depth_distribution": depth_dist
43
+ }
44
+
45
+ def _get_components(self) -> List[Dict[str, Any]]:
46
+ """Identify top-level components relative to the entries."""
47
+ if not self.entries:
48
+ return []
49
+
50
+ # Find the common path to determine the root
51
+ all_paths = [Path(e.path) for e in self.entries]
52
+ root_path = Path(os.path.commonpath([str(p) for p in all_paths]))
53
+
54
+ results = []
55
+ top_dirs = set()
56
+
57
+ for entry in self.entries:
58
+ try:
59
+ # Calculate relative path from common root
60
+ rel_path = Path(entry.path).relative_to(root_path)
61
+ if len(rel_path.parts) > 1:
62
+ top_dirs.add(rel_path.parts[0])
63
+ except ValueError:
64
+ continue
65
+
66
+ for dir_name in sorted(top_dirs):
67
+ # Count files belonging to this component
68
+ count = sum(1 for e in self.entries if e.is_file and dir_name in Path(e.path).parts)
69
+ results.append({"name": dir_name, "count": count})
70
+
71
+ return results
72
+
73
+ def _get_depth_distribution(self) -> Dict[int, int]:
74
+ """Count files at each directory depth level and sync max_depth."""
75
+ depth_counts = defaultdict(int)
76
+ for e in self.entries:
77
+ if e.is_file:
78
+ depth_counts[e.depth] += 1
79
+
80
+ if depth_counts:
81
+ self.stats.max_depth = max(depth_counts.keys())
82
+
83
+ return dict(sorted(depth_counts.items()))
84
+
85
+ def _get_distribution(self) -> List[Dict[str, Any]]:
86
+ """Get file distribution by extension and language."""
87
+ results = []
88
+ if not self.stats.files_by_ext:
89
+ return results
90
+
91
+ # Sort by count descending
92
+ sorted_exts = sorted(
93
+ self.stats.files_by_ext.items(),
94
+ key=lambda x: x[1],
95
+ reverse=True
96
+ )
97
+
98
+ for ext, count in sorted_exts:
99
+ lang = LANGUAGE_MAP.get(ext, "unknown")
100
+ percentage = (count / self.stats.total_files * 100) if self.stats.total_files > 0 else 0
101
+ results.append({
102
+ "ext": ext,
103
+ "lang": lang,
104
+ "count": count,
105
+ "percentage": round(percentage, 1)
106
+ })
107
+ return results
108
+
109
+ def _get_patterns(self) -> List[str]:
110
+ """Detect common design patterns based on filenames."""
111
+ dir_files: Dict[str, Set[str]] = defaultdict(set)
112
+ for entry in self.entries:
113
+ if entry.is_file:
114
+ parent = str(Path(entry.path).parent)
115
+ filename = Path(entry.path).name.lower()
116
+ dir_files[parent].add(filename)
117
+
118
+ detected = []
119
+ patterns_map = {
120
+ 'MVC': {
121
+ 'indicators': ['model.py', 'view.py', 'controller.py'],
122
+ 'description': 'Model-View-Controller pattern detected'
123
+ },
124
+ 'Testing': {
125
+ 'indicators': ['test_', '__test__', 'tests.py', 'test.py'],
126
+ 'description': 'Organized test structure'
127
+ },
128
+ 'Configuration': {
129
+ 'indicators': ['.env', 'config.py', 'settings.py', 'config.yml', 'pyproject.toml'],
130
+ 'description': 'Centralized configuration files'
131
+ },
132
+ 'Documentation': {
133
+ 'indicators': ['readme.md', 'docs/', 'documentation/'],
134
+ 'description': 'Structured documentation'
135
+ },
136
+ 'API': {
137
+ 'indicators': ['routes.py', 'api.py', 'endpoints.py', 'views.py'],
138
+ 'description': 'API/Routes architecture'
139
+ },
140
+ 'Database': {
141
+ 'indicators': ['models.py', 'schema.py', 'migrations/', 'db.py'],
142
+ 'description': 'Persistence/Database layer'
143
+ },
144
+ }
145
+
146
+ for pattern_info in patterns_map.values():
147
+ for files in dir_files.values():
148
+ if any(any(ind in f for f in files) for ind in pattern_info['indicators']):
149
+ detected.append(pattern_info['description'])
150
+ break
151
+
152
+ return sorted(list(set(detected)))
code_assembler/cli.py ADDED
@@ -0,0 +1,148 @@
1
+ """
2
+ Command Line Interface for Code Assembler Pro.
3
+ """
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+ from typing import List
9
+
10
+ from .core import assemble_codebase, assemble_from_config
11
+ from .constants import __version__, DEFAULT_MAX_FILE_SIZE_MB
12
+
13
+
14
+ def parse_args():
15
+ parser = argparse.ArgumentParser(
16
+ description="Consolidate a codebase into a single Markdown file for LLM analysis."
17
+ )
18
+
19
+ parser.add_argument(
20
+ "--version",
21
+ action="version",
22
+ version=f"%(prog)s {__version__}"
23
+ )
24
+
25
+ # Interactive mode
26
+ parser.add_argument(
27
+ "--interactive", "-i",
28
+ action="store_true",
29
+ help="Launch interactive wizard mode"
30
+ )
31
+
32
+ # Config file mode
33
+ parser.add_argument(
34
+ "--config", "-c",
35
+ type=str,
36
+ help="Path to a JSON configuration file"
37
+ )
38
+
39
+ # Main arguments (used if --config is not present)
40
+ parser.add_argument(
41
+ "paths",
42
+ nargs="*",
43
+ help="Files or directories to analyze"
44
+ )
45
+
46
+ parser.add_argument(
47
+ "--ext", "-e",
48
+ dest="extensions",
49
+ nargs="+",
50
+ help="Extensions to include (e.g., py md json)"
51
+ )
52
+
53
+ parser.add_argument(
54
+ "--output", "-o",
55
+ default="codebase.md",
56
+ help="Output file name (default: codebase.md)"
57
+ )
58
+
59
+ parser.add_argument(
60
+ "--exclude", "-x",
61
+ dest="exclude_patterns",
62
+ nargs="+",
63
+ help="Patterns to exclude (added to defaults)"
64
+ )
65
+
66
+ # Boolean flags
67
+ parser.add_argument(
68
+ "--no-recursive",
69
+ action="store_false",
70
+ dest="recursive",
71
+ help="Do not traverse subdirectories recursively"
72
+ )
73
+
74
+ parser.add_argument(
75
+ "--no-readmes",
76
+ action="store_false",
77
+ dest="include_readmes",
78
+ help="Do not automatically include README files"
79
+ )
80
+
81
+ parser.add_argument(
82
+ "--no-default-excludes",
83
+ action="store_false",
84
+ dest="use_default_excludes",
85
+ help="Do not use the default exclusion list"
86
+ )
87
+
88
+ parser.add_argument(
89
+ "--max-size",
90
+ type=float,
91
+ default=DEFAULT_MAX_FILE_SIZE_MB,
92
+ help=f"Maximum file size in MB (default: {DEFAULT_MAX_FILE_SIZE_MB})"
93
+ )
94
+
95
+ # Set defaults for flags
96
+ parser.set_defaults(
97
+ recursive=True,
98
+ include_readmes=True,
99
+ use_default_excludes=True
100
+ )
101
+
102
+ return parser.parse_args()
103
+
104
+
105
+ def main():
106
+ args = parse_args()
107
+
108
+ try:
109
+ if args.config:
110
+ # JSON Configuration Mode
111
+ print(f"Loading configuration from: {args.config}")
112
+ assemble_from_config(args.config)
113
+ else:
114
+ # CLI Arguments Mode
115
+ if not args.paths:
116
+ print("Error: No path specified.")
117
+ print("Usage: code-assembler path/to/code --ext py js")
118
+ sys.exit(1)
119
+
120
+ if not args.extensions:
121
+ print("Error: No extensions specified.")
122
+ print("Use --ext or -e (e.g., --ext py md)")
123
+ sys.exit(1)
124
+
125
+ # Normalize extensions (add leading dot if missing)
126
+ extensions = [
127
+ e if e.startswith('.') else f'.{e}'
128
+ for e in args.extensions
129
+ ]
130
+
131
+ assemble_codebase(
132
+ paths=args.paths,
133
+ extensions=extensions,
134
+ exclude_patterns=args.exclude_patterns,
135
+ output=args.output,
136
+ recursive=args.recursive,
137
+ include_readmes=args.include_readmes,
138
+ max_file_size_mb=args.max_size,
139
+ use_default_excludes=args.use_default_excludes
140
+ )
141
+
142
+ except Exception as e:
143
+ print(f"\n❌ An error occurred: {str(e)}")
144
+ sys.exit(1)
145
+
146
+
147
+ if __name__ == "__main__":
148
+ main()
@@ -0,0 +1,136 @@
1
+ """
2
+ Configuration classes for Code Assembler Pro.
3
+
4
+ This module defines all configuration dataclasses and validation logic.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import List, Optional
10
+
11
+ from .constants import DEFAULT_EXCLUDE_PATTERNS, DEFAULT_MAX_FILE_SIZE_MB
12
+
13
+
14
+ @dataclass
15
+ class AssemblerConfig:
16
+ """
17
+ Main configuration for codebase assembly.
18
+
19
+ Attributes:
20
+ paths: List of file/directory paths to process
21
+ extensions: File extensions to include (with or without leading dot)
22
+ exclude_patterns: Patterns to exclude from processing
23
+ output_file: Output markdown filename
24
+ recursive: Whether to recursively traverse directories
25
+ include_readmes: Whether to automatically include README files
26
+ max_file_size_mb: Maximum file size in MB to process
27
+ truncate_large_files: If True, truncates files exceeding max_file_size_mb
28
+ truncation_limit_lines: Number of lines to keep if truncated
29
+ show_progress: Whether to show progress information
30
+ use_default_excludes: Whether to use default exclude patterns
31
+ """
32
+
33
+ paths: List[str]
34
+ extensions: List[str]
35
+ exclude_patterns: List[str] = field(default_factory=list)
36
+ output_file: str = "codebase.md"
37
+ recursive: bool = True
38
+ include_readmes: bool = True
39
+ max_file_size_mb: float = DEFAULT_MAX_FILE_SIZE_MB
40
+ truncate_large_files: bool = True
41
+ truncation_limit_lines: int = 500
42
+ show_progress: bool = True
43
+ use_default_excludes: bool = True
44
+
45
+ def __post_init__(self):
46
+ """Validate and normalize configuration after initialization."""
47
+ if not self.paths:
48
+ raise ValueError("At least one path must be specified")
49
+
50
+ if not self.extensions:
51
+ raise ValueError("At least one extension must be specified")
52
+
53
+ # Normalize extensions to include leading dot
54
+ self.extensions = [
55
+ ext if ext.startswith('.') else f'.{ext}'
56
+ for ext in self.extensions
57
+ ]
58
+
59
+ # Add default excludes if requested
60
+ if self.use_default_excludes:
61
+ self.exclude_patterns = list(set(
62
+ self.exclude_patterns + DEFAULT_EXCLUDE_PATTERNS
63
+ ))
64
+
65
+ if self.max_file_size_mb <= 0:
66
+ raise ValueError("max_file_size_mb must be positive")
67
+
68
+ @classmethod
69
+ def from_dict(cls, config_dict: dict) -> "AssemblerConfig":
70
+ return cls(**config_dict)
71
+
72
+ def to_dict(self) -> dict:
73
+ return {
74
+ "paths": self.paths,
75
+ "extensions": self.extensions,
76
+ "exclude_patterns": self.exclude_patterns,
77
+ "output_file": self.output_file,
78
+ "recursive": self.recursive,
79
+ "include_readmes": self.include_readmes,
80
+ "max_file_size_mb": self.max_file_size_mb,
81
+ "truncate_large_files": self.truncate_large_files,
82
+ "truncation_limit_lines": self.truncation_limit_lines,
83
+ "show_progress": self.show_progress,
84
+ "use_default_excludes": self.use_default_excludes,
85
+ }
86
+
87
+
88
+ @dataclass
89
+ class FileEntry:
90
+ """Represents a file or directory entry in the table of contents."""
91
+ path: str
92
+ type: str # 'file' or 'dir'
93
+ depth: int
94
+ size_bytes: int = 0
95
+ line_count: int = 0
96
+
97
+ @property
98
+ def name(self) -> str:
99
+ return Path(self.path).name
100
+
101
+ @property
102
+ def is_file(self) -> bool:
103
+ return self.type == 'file'
104
+
105
+ @property
106
+ def is_directory(self) -> bool:
107
+ return self.type == 'dir'
108
+
109
+
110
+ @dataclass
111
+ class CodebaseStats:
112
+ """Statistics about the assembled codebase."""
113
+ total_files: int = 0
114
+ total_lines: int = 0
115
+ total_chars: int = 0
116
+ estimated_tokens: int = 0
117
+ files_by_ext: dict = field(default_factory=dict)
118
+ largest_file: Optional[tuple] = None
119
+ max_depth: int = 0
120
+ skipped_files: List[str] = field(default_factory=list)
121
+
122
+ def update_largest_file(self, path: str, size: int):
123
+ if not self.largest_file or size > self.largest_file[1]:
124
+ self.largest_file = (path, size)
125
+
126
+ def add_file(self, extension: str, lines: int, size: int):
127
+ self.total_files += 1
128
+ self.total_lines += lines
129
+
130
+ if extension not in self.files_by_ext:
131
+ self.files_by_ext[extension] = 0
132
+ self.files_by_ext[extension] += 1
133
+
134
+ def skip_file(self, path: str, reason: str = ""):
135
+ entry = f"{path}" + (f" ({reason})" if reason else "")
136
+ self.skipped_files.append(entry)
@@ -0,0 +1,203 @@
1
+ """
2
+ Constants for Code Assembler Pro.
3
+
4
+ This module contains all constant values used throughout the package,
5
+ including language mappings, file extensions, and default configurations.
6
+ """
7
+
8
+ from typing import Dict
9
+
10
+ # In code_assembler/constants.py
11
+
12
+ # Version
13
+ __version__ = "4.2.1" # Changed from 4.1.0
14
+
15
+ # Language mapping for syntax highlighting
16
+ LANGUAGE_MAP: Dict[str, str] = {
17
+ # Programming languages
18
+ ".py": "python",
19
+ ".pyw": "python",
20
+ ".pyi": "python",
21
+ ".js": "javascript",
22
+ ".jsx": "jsx",
23
+ ".mjs": "javascript",
24
+ ".cjs": "javascript",
25
+ ".ts": "typescript",
26
+ ".tsx": "tsx",
27
+ ".java": "java",
28
+ ".kt": "kotlin",
29
+ ".kts": "kotlin",
30
+ ".scala": "scala",
31
+ ".c": "c",
32
+ ".h": "c",
33
+ ".cpp": "cpp",
34
+ ".cc": "cpp",
35
+ ".cxx": "cpp",
36
+ ".hpp": "cpp",
37
+ ".hh": "cpp",
38
+ ".hxx": "cpp",
39
+ ".cs": "csharp",
40
+ ".go": "go",
41
+ ".rs": "rust",
42
+ ".rb": "ruby",
43
+ ".php": "php",
44
+ ".swift": "swift",
45
+ ".m": "objective-c",
46
+ ".r": "r",
47
+ ".jl": "julia",
48
+ ".lua": "lua",
49
+ ".pl": "perl",
50
+ ".pm": "perl",
51
+ ".dart": "dart",
52
+ ".elm": "elm",
53
+ ".ex": "elixir",
54
+ ".exs": "elixir",
55
+ ".erl": "erlang",
56
+ ".hrl": "erlang",
57
+ ".clj": "clojure",
58
+ ".cljs": "clojure",
59
+ ".fs": "fsharp",
60
+ ".fsx": "fsharp",
61
+ ".hs": "haskell",
62
+ ".ml": "ocaml",
63
+ ".v": "verilog",
64
+ ".vhd": "vhdl",
65
+
66
+ # Web & markup
67
+ ".html": "html",
68
+ ".htm": "html",
69
+ ".xml": "xml",
70
+ ".svg": "xml",
71
+ ".css": "css",
72
+ ".scss": "scss",
73
+ ".sass": "sass",
74
+ ".less": "less",
75
+ ".vue": "vue",
76
+ ".svelte": "svelte",
77
+
78
+ # Data & config
79
+ ".json": "json",
80
+ ".yaml": "yaml",
81
+ ".yml": "yaml",
82
+ ".toml": "toml",
83
+ ".ini": "ini",
84
+ ".cfg": "ini",
85
+ ".conf": "ini",
86
+ ".csv": "csv",
87
+ ".tsv": "csv",
88
+
89
+ # Documentation
90
+ ".md": "markdown",
91
+ ".markdown": "markdown",
92
+ ".rst": "rst",
93
+ ".txt": "text",
94
+ ".adoc": "asciidoc",
95
+
96
+ # Shell & scripts
97
+ ".sh": "bash",
98
+ ".bash": "bash",
99
+ ".zsh": "zsh",
100
+ ".fish": "fish",
101
+ ".ps1": "powershell",
102
+ ".psm1": "powershell",
103
+ ".bat": "batch",
104
+ ".cmd": "batch",
105
+
106
+ # Database
107
+ ".sql": "sql",
108
+ ".psql": "sql",
109
+ ".mysql": "sql",
110
+ ".pgsql": "sql",
111
+
112
+ # Build & CI/CD
113
+ ".dockerfile": "dockerfile",
114
+ ".dockerignore": "text",
115
+ ".gitignore": "text",
116
+ ".gitattributes": "text",
117
+ ".editorconfig": "ini",
118
+
119
+ # Other
120
+ ".env": "bash",
121
+ ".properties": "properties",
122
+ ".gradle": "gradle",
123
+ ".makefile": "makefile",
124
+ ".cmake": "cmake",
125
+ ".proto": "protobuf",
126
+ ".graphql": "graphql",
127
+ ".gql": "graphql",
128
+ }
129
+
130
+ # Default exclude patterns
131
+ DEFAULT_EXCLUDE_PATTERNS = [
132
+ "__pycache__",
133
+ ".pyc",
134
+ ".pyo",
135
+ ".pyd",
136
+ ".so",
137
+ ".dll",
138
+ ".dylib",
139
+ ".egg-info",
140
+ ".eggs",
141
+ "dist",
142
+ "build",
143
+ ".git",
144
+ ".svn",
145
+ ".hg",
146
+ ".venv",
147
+ "venv",
148
+ "env",
149
+ "node_modules",
150
+ ".idea",
151
+ ".vscode",
152
+ ".DS_Store",
153
+ "Thumbs.db",
154
+ ]
155
+
156
+ # Common README filenames
157
+ README_FILENAMES = [
158
+ "README.md",
159
+ "README.MD",
160
+ "README.rst",
161
+ "README.txt",
162
+ "README",
163
+ "readme.md",
164
+ "Readme.md",
165
+ ]
166
+
167
+ # Token estimation constants
168
+ CHARS_PER_TOKEN = 4 # Average characters per token (rough estimate)
169
+
170
+ # File size limits
171
+ DEFAULT_MAX_FILE_SIZE_MB = 10.0
172
+ MAX_SAFE_FILE_SIZE_MB = 100.0
173
+
174
+ # Emojis for output formatting
175
+ EMOJI = {
176
+ "folder": "πŸ“",
177
+ "file": "πŸ“„",
178
+ "readme": "ℹ️",
179
+ "success": "βœ…",
180
+ "warning": "⚠️",
181
+ "error": "❌",
182
+ "rocket": "πŸš€",
183
+ "chart": "πŸ“Š",
184
+ "target": "🎯",
185
+ "building": "πŸ›οΈ",
186
+ "map": "πŸ—ΊοΈ",
187
+ "book": "πŸ“–",
188
+ "bug": "πŸ›",
189
+ "memo": "πŸ“",
190
+ "mag": "πŸ”",
191
+ "test": "πŸ§ͺ",
192
+ "recycle": "πŸ”„",
193
+ "bulb": "πŸ’‘",
194
+ "floppy": "πŸ’Ύ",
195
+ }
196
+
197
+ # Header templates
198
+ HEADER_LEVELS = {
199
+ "document": 1,
200
+ "section": 2,
201
+ "subsection": 3,
202
+ "file": 2,
203
+ }