project-scriber 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- project_scriber-1.0.0.dist-info/METADATA +197 -0
- project_scriber-1.0.0.dist-info/RECORD +8 -0
- project_scriber-1.0.0.dist-info/WHEEL +4 -0
- project_scriber-1.0.0.dist-info/entry_points.txt +2 -0
- project_scriber-1.0.0.dist-info/licenses/LICENSE +21 -0
- scriber/__init__.py +0 -0
- scriber/cli.py +252 -0
- scriber/core.py +306 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: project-scriber
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: An intelligent tool to map, analyze, and compile project source code for LLM context.
|
|
5
|
+
Project-URL: Homepage, https://github.com/SunneV/ProjectScriber
|
|
6
|
+
Project-URL: Issues, https://github.com/SunneV/ProjectScriber/issues
|
|
7
|
+
Author-email: "SunneV (Wojciech Mariusz CichoΕ)" <wojciech.m.cichon@gmail.com>
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2025 SunneV (Wojciech Mariusz CichoΕ)
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: code-analysis,context-builder,developer-tools,llm,source-code
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
35
|
+
Classifier: Topic :: Utilities
|
|
36
|
+
Requires-Python: >=3.10
|
|
37
|
+
Requires-Dist: pathspec
|
|
38
|
+
Requires-Dist: pyperclip
|
|
39
|
+
Requires-Dist: python-dotenv
|
|
40
|
+
Requires-Dist: rich
|
|
41
|
+
Requires-Dist: tiktoken
|
|
42
|
+
Requires-Dist: tomli; python_version < '3.11'
|
|
43
|
+
Requires-Dist: tomlkit
|
|
44
|
+
Provides-Extra: dev
|
|
45
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
46
|
+
Requires-Dist: pytest-mock; extra == 'dev'
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
|
|
49
|
+
# Introduction
|
|
50
|
+
|
|
51
|
+
<p align="center">
|
|
52
|
+
<img src="https://raw.githubusercontent.com/SunneV/ProjectScriber/main/assets/scriber_logo.svg" alt="ProjectScriber Logo" width="420">
|
|
53
|
+
</p>
|
|
54
|
+
<p align="center">
|
|
55
|
+
<img src="https://raw.githubusercontent.com/SunneV/ProjectScriber/main/assets/scriber_name.svg" alt="ProjectScriber Name" width="300">
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
A command-line tool to intelligently map and compile your entire project's source code into a single, context-optimized
|
|
59
|
+
text file for Large Language Models (LLMs).
|
|
60
|
+
|
|
61
|
+
ProjectScriber scans your project directory, respects `.gitignore` rules, applies custom filters, and bundles all
|
|
62
|
+
relevant code into a clean, readable format. It's the perfect way to provide a complete codebase to an AI for analysis,
|
|
63
|
+
documentation, or refactoring.
|
|
64
|
+
|
|
65
|
+
-----
|
|
66
|
+
|
|
67
|
+
## Key Features
|
|
68
|
+
|
|
69
|
+
- **π³ Smart Project Mapping:** Generates a clear and intuitive tree view of your project's structure.
|
|
70
|
+
- **βοΈ Intelligent Filtering:** Automatically respects `.gitignore` rules and supports custom `include` and `exclude`
|
|
71
|
+
patterns via a `.scriber.json` file for fine-grained control.
|
|
72
|
+
- **π In-depth Code Analysis:** Provides a summary with total file size, estimated token count (using `cl100k_base`),
|
|
73
|
+
and a language breakdown for a quick overview of your codebase.
|
|
74
|
+
- **β¨ Interactive Setup:** A simple `scriber init` command walks you through creating a configuration file tailored to
|
|
75
|
+
your project.
|
|
76
|
+
- **π Clipboard Integration:** Use the `--copy` flag to automatically copy the entire consolidated output to your
|
|
77
|
+
clipboard, ready to be pasted into any application.
|
|
78
|
+
- **π§ Flexible Configuration:** Manage your settings globally in a `pyproject.toml` file or per-project with a
|
|
79
|
+
`.scriber.json` file.
|
|
80
|
+
|
|
81
|
+
-----
|
|
82
|
+
|
|
83
|
+
## Getting Started
|
|
84
|
+
|
|
85
|
+
### Prerequisites
|
|
86
|
+
|
|
87
|
+
- Python 3.10 or higher.
|
|
88
|
+
|
|
89
|
+
### Installation
|
|
90
|
+
|
|
91
|
+
Install the package from the source using pip. For development, include the optional dependencies.
|
|
92
|
+
|
|
93
|
+
```shell
|
|
94
|
+
# Navigate to the project root directory
|
|
95
|
+
pip install .[dev]
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
This will install `ProjectScriber` and make the `scriber` command available in your terminal.
|
|
99
|
+
|
|
100
|
+
-----
|
|
101
|
+
|
|
102
|
+
## Usage
|
|
103
|
+
|
|
104
|
+
### 1\. Basic Scan
|
|
105
|
+
|
|
106
|
+
To run ProjectScriber on the current directory, simply execute the `scriber` command. This will generate a
|
|
107
|
+
`scriber_output.txt` file in the same directory.
|
|
108
|
+
|
|
109
|
+
```shell
|
|
110
|
+
scriber
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
To target a different project directory:
|
|
114
|
+
|
|
115
|
+
```shell
|
|
116
|
+
scriber /path/to/your/project
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 2\. First-Time Configuration
|
|
120
|
+
|
|
121
|
+
For a new project, run the interactive `init` command to create a `.scriber.json` configuration file. This will guide
|
|
122
|
+
you through setting up rules for ignoring files and respecting `.gitignore`.
|
|
123
|
+
|
|
124
|
+
```shell
|
|
125
|
+
scriber init
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### 3\. Advanced Example
|
|
129
|
+
|
|
130
|
+
Scan a different project, specify a custom output file, and copy the result to the clipboard all in one command.
|
|
131
|
+
|
|
132
|
+
```shell
|
|
133
|
+
scriber ../my-other-project --output custom_map.txt --copy
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
-----
|
|
137
|
+
|
|
138
|
+
## Commands and Options
|
|
139
|
+
|
|
140
|
+
You can customize ProjectScriber's behavior with the following commands and options.
|
|
141
|
+
|
|
142
|
+
| Command/Option | Alias | Description |
|
|
143
|
+
|:----------------------|:-----:|:-------------------------------------------------------------------------------|
|
|
144
|
+
| `scriber [path]` | | Targets a specific directory. Defaults to the current working directory. |
|
|
145
|
+
| `init` | | Starts the interactive process to create a `.scriber.json` configuration file. |
|
|
146
|
+
| `--output [filename]` | `-o` | Specifies a custom name for the output file. |
|
|
147
|
+
| `--copy` | `-c` | Copies the final output directly to the clipboard. |
|
|
148
|
+
| `--tree-only` | | Generates only the folder structure map, excluding all file contents. |
|
|
149
|
+
| `--config [path]` | | Specifies the path to a custom configuration file. |
|
|
150
|
+
|
|
151
|
+
-----
|
|
152
|
+
|
|
153
|
+
## Configuration
|
|
154
|
+
|
|
155
|
+
You can control ProjectScriber's behavior by placing a `.scriber.json` file in your project's root, which can be easily
|
|
156
|
+
created with the `scriber init` command.
|
|
157
|
+
|
|
158
|
+
**Example `.scriber.json`:**
|
|
159
|
+
|
|
160
|
+
```json
|
|
161
|
+
{
|
|
162
|
+
"use_gitignore": true,
|
|
163
|
+
"exclude": [
|
|
164
|
+
"__pycache__",
|
|
165
|
+
"node_modules",
|
|
166
|
+
"*.log"
|
|
167
|
+
],
|
|
168
|
+
"include": [
|
|
169
|
+
"*.py",
|
|
170
|
+
"*.js"
|
|
171
|
+
]
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Example `pyproject.toml`:**
|
|
176
|
+
|
|
177
|
+
```toml
|
|
178
|
+
[tool.scriber]
|
|
179
|
+
use_gitignore = true
|
|
180
|
+
exclude = [
|
|
181
|
+
"__pycache__",
|
|
182
|
+
"node_modules",
|
|
183
|
+
"*.log",
|
|
184
|
+
]
|
|
185
|
+
include = [
|
|
186
|
+
"*.py",
|
|
187
|
+
"*.js",
|
|
188
|
+
]
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
- **`use_gitignore`**: If `true`, all patterns in your `.gitignore` file will be used for exclusion.
|
|
192
|
+
- **`exclude`**: A list of file or folder name patterns to explicitly ignore.
|
|
193
|
+
- **`include`**: If provided, *only* files matching these patterns will be included in the output, overriding other
|
|
194
|
+
rules.
|
|
195
|
+
|
|
196
|
+
Settings can also be placed in your `pyproject.toml` file under the `[tool.scriber]` section. If a `.scriber.json` file
|
|
197
|
+
is present, it will take precedence over the `pyproject.toml` configuration.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
scriber/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
scriber/cli.py,sha256=CdKZMfaeYTicTqNz0GIMDlrTGWtnSbOAYCVfaAdyrCY,9933
|
|
3
|
+
scriber/core.py,sha256=LZBL0_XFLxNxfNufIxXSPKTBPj1HMXSK9v5DJDbDNVs,12736
|
|
4
|
+
project_scriber-1.0.0.dist-info/METADATA,sha256=ZAbukF9vm7RRBT0SMYQqSZYo6hV5ll9hqLHWcpOfK4U,7273
|
|
5
|
+
project_scriber-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
project_scriber-1.0.0.dist-info/entry_points.txt,sha256=gJdpGxBU9cD3iHBDOF56S3kNLpLLXivqj0t0MOiK6kU,45
|
|
7
|
+
project_scriber-1.0.0.dist-info/licenses/LICENSE,sha256=ZdaM_XhB0sejG9DBcmBQADwkyvgVTixJV8wF9I3dn3g,1111
|
|
8
|
+
project_scriber-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 SunneV (Wojciech Mariusz CichoΕ)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
scriber/__init__.py
ADDED
|
File without changes
|
scriber/cli.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
from importlib import metadata
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import pyperclip
|
|
10
|
+
import rich.box
|
|
11
|
+
import tomlkit
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
from rich.panel import Panel
|
|
15
|
+
from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
|
|
16
|
+
from rich.prompt import Confirm, Prompt
|
|
17
|
+
from rich.table import Table
|
|
18
|
+
from rich.text import Text
|
|
19
|
+
|
|
20
|
+
from .core import DEFAULT_CONFIG, Scriber
|
|
21
|
+
|
|
22
|
+
load_dotenv()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def format_bytes(byte_count: int) -> str:
|
|
26
|
+
"""Formats a byte count into a human-readable string (KB, MB)."""
|
|
27
|
+
if byte_count > 1024 * 1024:
|
|
28
|
+
return f"{byte_count / (1024 * 1024):.2f} MB"
|
|
29
|
+
if byte_count > 1024:
|
|
30
|
+
return f"{byte_count / 1024:.2f} KB"
|
|
31
|
+
return f"{byte_count} Bytes"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def save_to_json(console: Console, config: dict[str, Any]):
|
|
35
|
+
"""Saves configuration to a .scriber.json file."""
|
|
36
|
+
config_path = Path.cwd() / ".scriber.json"
|
|
37
|
+
try:
|
|
38
|
+
with open(config_path, "w", encoding="utf-8") as f:
|
|
39
|
+
json.dump(config, f, indent=2)
|
|
40
|
+
console.print(f"\nβ
[bold green]Configuration saved to:[/] {config_path}")
|
|
41
|
+
except IOError as e:
|
|
42
|
+
console.print(f"\nβ [bold red]Error saving config file:[/] {e}")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def save_to_toml(console: Console, config: dict[str, Any]):
|
|
46
|
+
"""Saves configuration to the pyproject.toml file."""
|
|
47
|
+
toml_path = Path.cwd() / "pyproject.toml"
|
|
48
|
+
if not toml_path.exists():
|
|
49
|
+
console.print(f"\nβ [bold red]Error: `pyproject.toml` not found in the current directory.[/]")
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
with open(toml_path, "r+", encoding="utf-8") as f:
|
|
54
|
+
doc = tomlkit.parse(f.read())
|
|
55
|
+
|
|
56
|
+
tool_table = doc.setdefault("tool", tomlkit.table())
|
|
57
|
+
scriber_table = tool_table.setdefault("scriber", tomlkit.table())
|
|
58
|
+
scriber_table.update(config)
|
|
59
|
+
|
|
60
|
+
f.seek(0)
|
|
61
|
+
f.truncate()
|
|
62
|
+
f.write(tomlkit.dumps(doc))
|
|
63
|
+
|
|
64
|
+
console.print(f"\nβ
[bold green]Configuration saved to:[/] {toml_path}")
|
|
65
|
+
except Exception as e:
|
|
66
|
+
console.print(f"\nβ [bold red]Error updating `pyproject.toml`:[/] {e}")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def handle_init(console: Console):
|
|
70
|
+
"""Handles the interactive initialization of a config file."""
|
|
71
|
+
console.print(Panel("[bold cyan]Scriber Configuration Setup[/]", expand=False))
|
|
72
|
+
console.print("This utility will help you create a configuration file.\n")
|
|
73
|
+
|
|
74
|
+
config: dict[str, Any] = {}
|
|
75
|
+
|
|
76
|
+
config["use_gitignore"] = Confirm.ask(
|
|
77
|
+
"β¨ Would you like to respect `.gitignore` rules?", default=True
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
default_exclude = ", ".join(DEFAULT_CONFIG.get("exclude", []))
|
|
81
|
+
exclude_str = Prompt.ask(
|
|
82
|
+
"π Enter patterns to exclude (comma-separated)", default=default_exclude
|
|
83
|
+
)
|
|
84
|
+
config["exclude"] = [item.strip() for item in exclude_str.split(',') if item.strip()]
|
|
85
|
+
|
|
86
|
+
include_str = Prompt.ask(
|
|
87
|
+
"π Enter patterns to include (optional, comma-separated)", default=""
|
|
88
|
+
)
|
|
89
|
+
include_patterns = [item.strip() for item in include_str.split(',') if item.strip()]
|
|
90
|
+
if include_patterns:
|
|
91
|
+
config["include"] = include_patterns
|
|
92
|
+
|
|
93
|
+
console.print("\n[bold]Choose a save location:[/bold]")
|
|
94
|
+
console.print(" [cyan]1[/]: Save to `.scriber.json` (project-specific override)")
|
|
95
|
+
console.print(" [cyan]2[/]: Save to `pyproject.toml` (project default)")
|
|
96
|
+
|
|
97
|
+
save_target = Prompt.ask(
|
|
98
|
+
"Enter your choice",
|
|
99
|
+
choices=["1", "2"],
|
|
100
|
+
default="1"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if save_target == '1':
|
|
104
|
+
save_to_json(console, config)
|
|
105
|
+
elif save_target == '2':
|
|
106
|
+
save_to_toml(console, config)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def run_scriber(args: argparse.Namespace, console: Console):
|
|
110
|
+
"""Handles the main logic of mapping and generating the project output."""
|
|
111
|
+
try:
|
|
112
|
+
version = metadata.version("project-scriber")
|
|
113
|
+
except metadata.PackageNotFoundError:
|
|
114
|
+
version = "1.0.0 (local)"
|
|
115
|
+
|
|
116
|
+
title_text = Text(f"Scriber v{version}", justify="center", style="bold magenta")
|
|
117
|
+
subtitle_text = Text("An intelligent tool to map, analyze, and compile project source code for LLM context.", justify="center", style="cyan")
|
|
118
|
+
console.print(Panel(Text.assemble(title_text, "\n", subtitle_text), expand=False, border_style="blue"))
|
|
119
|
+
|
|
120
|
+
scriber = Scriber(args.root_path.resolve(), config_path=args.config)
|
|
121
|
+
output_filename = args.output or scriber.config.get("output", "project_structure.txt")
|
|
122
|
+
|
|
123
|
+
scriber.map_project()
|
|
124
|
+
|
|
125
|
+
with Progress(
|
|
126
|
+
SpinnerColumn(),
|
|
127
|
+
TextColumn("[progress.description]{task.description}"),
|
|
128
|
+
BarColumn(),
|
|
129
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
130
|
+
console=console,
|
|
131
|
+
transient=True
|
|
132
|
+
) as progress:
|
|
133
|
+
total_files = scriber.get_file_count()
|
|
134
|
+
if total_files > 0 and not args.tree_only:
|
|
135
|
+
task_id = progress.add_task("[green]Processing files...", total=total_files)
|
|
136
|
+
scriber.generate_output_file(output_filename, tree_only=args.tree_only, progress=progress, task_id=task_id)
|
|
137
|
+
else:
|
|
138
|
+
scriber.generate_output_file(output_filename, tree_only=args.tree_only)
|
|
139
|
+
|
|
140
|
+
stats = scriber.get_stats()
|
|
141
|
+
|
|
142
|
+
config_file_display = str(scriber.config_path_used) if scriber.config_path_used else "Defaults"
|
|
143
|
+
summary_table = Table(box=rich.box.ROUNDED, show_header=False, title="[bold]Run Summary[/]", title_justify="left")
|
|
144
|
+
summary_table.add_column("Parameter", style="cyan", no_wrap=True)
|
|
145
|
+
summary_table.add_column("Value", style="magenta")
|
|
146
|
+
summary_table.add_row("Project Path", str(args.root_path.resolve()))
|
|
147
|
+
summary_table.add_row("Config File", config_file_display)
|
|
148
|
+
summary_table.add_row("Output File", output_filename)
|
|
149
|
+
console.print(summary_table)
|
|
150
|
+
|
|
151
|
+
if stats['total_files'] > 0:
|
|
152
|
+
results_table = Table(box=rich.box.ROUNDED, show_header=False, title="[bold]π Analysis Results[/]",
|
|
153
|
+
title_justify="left")
|
|
154
|
+
results_table.add_column("Metric", style="cyan", no_wrap=True)
|
|
155
|
+
results_table.add_column("Value", style="magenta", justify="right")
|
|
156
|
+
|
|
157
|
+
results_table.add_row("Files Mapped", str(stats['total_files']))
|
|
158
|
+
if stats.get('skipped_binary') > 0:
|
|
159
|
+
results_table.add_row("Binary Skipped", str(stats['skipped_binary']))
|
|
160
|
+
results_table.add_section()
|
|
161
|
+
results_table.add_row("Total Size", format_bytes(stats['total_size_bytes']))
|
|
162
|
+
results_table.add_row("Est. Tokens (cl100k)", f"{stats['total_tokens']:,}")
|
|
163
|
+
results_table.add_section()
|
|
164
|
+
results_table.add_row("[bold]Language Breakdown[/]", "")
|
|
165
|
+
for lang, count in stats['language_counts'].most_common():
|
|
166
|
+
results_table.add_row(f" {lang.capitalize()}", str(count))
|
|
167
|
+
|
|
168
|
+
console.print(results_table)
|
|
169
|
+
else:
|
|
170
|
+
console.print(Panel("[yellow]No files were mapped based on the current configuration.[/]", expand=False))
|
|
171
|
+
|
|
172
|
+
output_location = args.root_path.resolve() / output_filename
|
|
173
|
+
|
|
174
|
+
console.print("\nβ
[green]Success! Output saved to:[/green]")
|
|
175
|
+
try:
|
|
176
|
+
uri = output_location.as_uri()
|
|
177
|
+
console.print(Text(str(output_location), style=f"bold cyan underline link {uri}"))
|
|
178
|
+
except Exception:
|
|
179
|
+
console.print(Text(str(output_location), style="bold cyan underline"))
|
|
180
|
+
|
|
181
|
+
if args.copy:
|
|
182
|
+
try:
|
|
183
|
+
with open(output_location, 'r', encoding='utf-8') as f:
|
|
184
|
+
content = f.read()
|
|
185
|
+
pyperclip.copy(content)
|
|
186
|
+
console.print("π [green]Content copied to clipboard.[/green]")
|
|
187
|
+
except Exception as e:
|
|
188
|
+
console.print(f"β [bold red]Could not copy to clipboard: {e}[/bold red]")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def main() -> None:
|
|
192
|
+
"""Parses arguments and runs the appropriate command."""
|
|
193
|
+
console = Console()
|
|
194
|
+
parser = argparse.ArgumentParser(
|
|
195
|
+
description="Scriber: An intelligent tool to map, analyze, and compile project source code for LLM context."
|
|
196
|
+
)
|
|
197
|
+
subparsers = parser.add_subparsers(dest="command", title="Commands")
|
|
198
|
+
|
|
199
|
+
init_parser = subparsers.add_parser("init", help="Create a new .scriber.json configuration file interactively.")
|
|
200
|
+
init_parser.set_defaults(func=lambda args: handle_init(console))
|
|
201
|
+
|
|
202
|
+
run_parser = argparse.ArgumentParser(add_help=False)
|
|
203
|
+
|
|
204
|
+
exec_mode = os.environ.get('SCRIBER_EXEC_MODE')
|
|
205
|
+
if exec_mode == 'RUN_PY':
|
|
206
|
+
default_path = Path.cwd().parent
|
|
207
|
+
del os.environ['SCRIBER_EXEC_MODE']
|
|
208
|
+
else:
|
|
209
|
+
default_path = Path.cwd()
|
|
210
|
+
|
|
211
|
+
run_parser.add_argument(
|
|
212
|
+
"root_path",
|
|
213
|
+
nargs="?",
|
|
214
|
+
default=os.environ.get("PROJECT_SCRIBER_ROOT", default_path),
|
|
215
|
+
type=Path,
|
|
216
|
+
help="The root directory of the project to map.",
|
|
217
|
+
)
|
|
218
|
+
run_parser.add_argument(
|
|
219
|
+
"-o",
|
|
220
|
+
"--output",
|
|
221
|
+
help="The name of the output file. Overrides config file settings.",
|
|
222
|
+
)
|
|
223
|
+
run_parser.add_argument(
|
|
224
|
+
"--config",
|
|
225
|
+
default=os.environ.get("PROJECT_SCRIBER_CONFIG"),
|
|
226
|
+
type=Path,
|
|
227
|
+
help="Path to a custom configuration file. Overrides default .scriber.json"
|
|
228
|
+
)
|
|
229
|
+
run_parser.add_argument(
|
|
230
|
+
"--copy",
|
|
231
|
+
"-c",
|
|
232
|
+
action="store_true",
|
|
233
|
+
help="Copy the final output to the clipboard.",
|
|
234
|
+
)
|
|
235
|
+
run_parser.add_argument(
|
|
236
|
+
"--tree-only",
|
|
237
|
+
action="store_true",
|
|
238
|
+
help="Generate only the file tree structure without file content.",
|
|
239
|
+
)
|
|
240
|
+
run_parser.set_defaults(func=lambda args: run_scriber(args, console))
|
|
241
|
+
|
|
242
|
+
# Make the 'run' command the default action if no subcommand is provided.
|
|
243
|
+
if len(sys.argv) == 1 or sys.argv[1] not in subparsers.choices:
|
|
244
|
+
args = run_parser.parse_args()
|
|
245
|
+
else:
|
|
246
|
+
args = parser.parse_args()
|
|
247
|
+
|
|
248
|
+
args.func(args)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
if __name__ == "__main__":
|
|
252
|
+
main()
|
scriber/core.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from collections import Counter
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional, Set, TextIO
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import tomllib
|
|
10
|
+
except ImportError:
|
|
11
|
+
import tomli as tomllib
|
|
12
|
+
|
|
13
|
+
import tiktoken
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
|
|
16
|
+
_DEFAULT_OUTPUT_FILENAME = "scriber_output.txt"
|
|
17
|
+
_CONFIG_FILE_NAME = ".scriber.json"
|
|
18
|
+
DEFAULT_CONFIG = {
|
|
19
|
+
"use_gitignore": True,
|
|
20
|
+
"exclude": [
|
|
21
|
+
# Common
|
|
22
|
+
"LICENSE"
|
|
23
|
+
|
|
24
|
+
# Version Control
|
|
25
|
+
".git",
|
|
26
|
+
|
|
27
|
+
# IDE / Editor Config
|
|
28
|
+
".idea", ".vscode", ".project", ".settings", ".classpath",
|
|
29
|
+
|
|
30
|
+
# Python
|
|
31
|
+
"__pycache__", "*.pyc", ".venv", "venv", ".pytest_cache", "uv.lock",
|
|
32
|
+
|
|
33
|
+
# Node.js
|
|
34
|
+
"node_modules", "npm-debug.log*", "yarn-error.log",
|
|
35
|
+
|
|
36
|
+
# Build Artifacts
|
|
37
|
+
"build", "dist", "target", "bin", "obj", "out",
|
|
38
|
+
|
|
39
|
+
# Dependencies
|
|
40
|
+
"vendor", "bower_components",
|
|
41
|
+
|
|
42
|
+
# Logs & Temp Files
|
|
43
|
+
"*.log", "*.lock", "*.tmp", "temp", "tmp",
|
|
44
|
+
|
|
45
|
+
# OS-specific
|
|
46
|
+
".DS_Store", "Thumbs.db", "*~", "*.swp", "*.swo",
|
|
47
|
+
|
|
48
|
+
# Scriber's own files
|
|
49
|
+
_DEFAULT_OUTPUT_FILENAME, _CONFIG_FILE_NAME
|
|
50
|
+
],
|
|
51
|
+
"include": [],
|
|
52
|
+
"output": _DEFAULT_OUTPUT_FILENAME,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Scriber:
|
|
57
|
+
_CONFIG_FILE_NAME = _CONFIG_FILE_NAME
|
|
58
|
+
_LANGUAGE_MAP = {
|
|
59
|
+
".asm": "asm", ".s": "asm", ".html": "html", ".htm": "html", ".css": "css",
|
|
60
|
+
".scss": "scss", ".sass": "sass", ".less": "less", ".js": "javascript",
|
|
61
|
+
".mjs": "javascript", ".cjs": "javascript", ".jsx": "jsx", ".ts": "typescript",
|
|
62
|
+
".tsx": "tsx", ".vue": "vue", ".svelte": "svelte", ".py": "python", ".pyw": "python",
|
|
63
|
+
".rb": "ruby", ".java": "java", ".kt": "kotlin", ".kts": "kotlin", ".scala": "scala",
|
|
64
|
+
".go": "go", ".php": "php", ".c": "c", ".h": "c", ".cpp": "cpp", ".hpp": "cpp",
|
|
65
|
+
".cs": "csharp", ".rs": "rust", ".swift": "swift", ".dart": "dart", ".pl": "perl",
|
|
66
|
+
".pm": "perl", ".hs": "haskell", ".lua": "lua", ".erl": "erlang", ".ex": "elixir",
|
|
67
|
+
".exs": "elixir", ".clj": "clojure", ".lisp": "lisp", ".f": "fortran",
|
|
68
|
+
".f90": "fortran", ".zig": "zig", ".d": "d", ".v": "v", ".cr": "crystal",
|
|
69
|
+
".nim": "nim", ".pas": "pascal", ".ml": "ocaml", ".sh": "bash", ".bash": "bash",
|
|
70
|
+
".zsh": "zsh", ".fish": "fish", ".ps1": "powershell", ".bat": "batch",
|
|
71
|
+
".json": "json", ".jsonc": "jsonc", ".xml": "xml", ".yaml": "yaml", ".yml": "yaml",
|
|
72
|
+
".toml": "toml", ".ini": "ini", ".properties": "properties", ".env": "dotenv",
|
|
73
|
+
"Dockerfile": "dockerfile", ".tf": "terraform", ".hcl": "hcl", ".groovy": "groovy",
|
|
74
|
+
".gradle": "groovy", ".cmake": "cmake", "CMakeLists.txt": "cmake", ".md": "markdown",
|
|
75
|
+
".mdx": "mdx", ".rst": "rst", ".tex": "latex", "LICENSE": "text", ".sql": "sql",
|
|
76
|
+
".graphql": "graphql", ".proto": "protobuf", ".glsl": "glsl", ".frag": "glsl",
|
|
77
|
+
".vert": "glsl", ".vb": "vbnet", ".vbs": "vbscript",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def __init__(self, root_path: Path, config_path: Optional[Path] = None):
|
|
81
|
+
self.root_path = root_path.resolve()
|
|
82
|
+
self.mapped_files: List[Path] = []
|
|
83
|
+
self._user_config_path = config_path
|
|
84
|
+
self._console = Console(stderr=True, style="bold red")
|
|
85
|
+
self.config: Dict[str, Any] = {}
|
|
86
|
+
self.config_path_used: Optional[Path] = None
|
|
87
|
+
self.gitignore_spec: Optional[Any] = None
|
|
88
|
+
|
|
89
|
+
self.stats = {
|
|
90
|
+
"total_files": 0,
|
|
91
|
+
"total_size_bytes": 0,
|
|
92
|
+
"total_tokens": 0,
|
|
93
|
+
"language_counts": Counter(),
|
|
94
|
+
"skipped_binary": 0,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
self._load_config()
|
|
98
|
+
try:
|
|
99
|
+
self._tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
100
|
+
except Exception:
|
|
101
|
+
self._tokenizer = None
|
|
102
|
+
|
|
103
|
+
def _create_default_config_file(self) -> None:
|
|
104
|
+
"""Creates a default .scriber.json config file if no other config is found."""
|
|
105
|
+
config_path = self.root_path / self._CONFIG_FILE_NAME
|
|
106
|
+
self._console.print(f"β¨ [yellow]No config found. Creating default configuration at:[/] {config_path}")
|
|
107
|
+
|
|
108
|
+
file_config = {
|
|
109
|
+
"use_gitignore": DEFAULT_CONFIG.get("use_gitignore", True),
|
|
110
|
+
"exclude": DEFAULT_CONFIG.get("exclude", []),
|
|
111
|
+
"include": DEFAULT_CONFIG.get("include", [])
|
|
112
|
+
}
|
|
113
|
+
try:
|
|
114
|
+
with config_path.open("w", encoding="utf-8") as f:
|
|
115
|
+
json.dump(file_config, f, indent=2)
|
|
116
|
+
except IOError as e:
|
|
117
|
+
self._console.print(f"β [bold red]Could not create default config file:[/] {e}")
|
|
118
|
+
|
|
119
|
+
def _load_config(self) -> None:
|
|
120
|
+
config = DEFAULT_CONFIG.copy()
|
|
121
|
+
toml_config_found = False
|
|
122
|
+
toml_path = self.root_path / "pyproject.toml"
|
|
123
|
+
|
|
124
|
+
if toml_path.is_file():
|
|
125
|
+
try:
|
|
126
|
+
with toml_path.open("rb") as f:
|
|
127
|
+
toml_data = tomllib.load(f)
|
|
128
|
+
if "tool" in toml_data and "scriber" in toml_data["tool"]:
|
|
129
|
+
config.update(toml_data["tool"]["scriber"])
|
|
130
|
+
toml_config_found = True
|
|
131
|
+
except tomllib.TOMLDecodeError:
|
|
132
|
+
self._console.print(f"Warning: Invalid pyproject.toml format in {toml_path}")
|
|
133
|
+
|
|
134
|
+
config_path_to_use = self._user_config_path or (self.root_path / self._CONFIG_FILE_NAME)
|
|
135
|
+
|
|
136
|
+
if not toml_config_found and not config_path_to_use.is_file() and not self._user_config_path:
|
|
137
|
+
self._create_default_config_file()
|
|
138
|
+
|
|
139
|
+
if config_path_to_use.is_file():
|
|
140
|
+
self.config_path_used = config_path_to_use
|
|
141
|
+
try:
|
|
142
|
+
with config_path_to_use.open("r", encoding="utf-8") as f:
|
|
143
|
+
config.update(json.load(f))
|
|
144
|
+
except json.JSONDecodeError as e:
|
|
145
|
+
self._console.print(f"Error: Invalid JSON in {config_path_to_use}. Details: {e}")
|
|
146
|
+
except IOError:
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
self.config = config
|
|
150
|
+
self.include_patterns: List[str] = self.config.get("include", [])
|
|
151
|
+
self.exclude_patterns: Set[str] = set(self.config.get("exclude", []))
|
|
152
|
+
self._load_gitignore(self.config.get("use_gitignore", True))
|
|
153
|
+
|
|
154
|
+
def _load_gitignore(self, use_gitignore: bool) -> None:
|
|
155
|
+
try:
|
|
156
|
+
import pathspec
|
|
157
|
+
except ImportError:
|
|
158
|
+
self._console.print("Warning: 'pathspec' not installed. .gitignore files will be ignored.")
|
|
159
|
+
self.gitignore_spec = None
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
self.gitignore_spec: Optional[pathspec.PathSpec] = None
|
|
163
|
+
if not use_gitignore: return
|
|
164
|
+
gitignore_path = self.root_path / ".gitignore"
|
|
165
|
+
if gitignore_path.is_file():
|
|
166
|
+
try:
|
|
167
|
+
with gitignore_path.open("r", encoding="utf-8") as f:
|
|
168
|
+
self.gitignore_spec = pathspec.PathSpec.from_lines("gitwildmatch", f)
|
|
169
|
+
except IOError:
|
|
170
|
+
pass
|
|
171
|
+
|
|
172
|
+
def _is_binary(self, path: Path) -> bool:
|
|
173
|
+
try:
|
|
174
|
+
with path.open('rb') as f:
|
|
175
|
+
return b'\0' in f.read(1024)
|
|
176
|
+
except IOError:
|
|
177
|
+
return True
|
|
178
|
+
|
|
179
|
+
def _is_excluded(self, path: Path) -> bool:
|
|
180
|
+
try:
|
|
181
|
+
relative_path = path.relative_to(self.root_path)
|
|
182
|
+
check_set = set(relative_path.parts)
|
|
183
|
+
except ValueError:
|
|
184
|
+
return True
|
|
185
|
+
|
|
186
|
+
if not self.exclude_patterns.isdisjoint(check_set): return True
|
|
187
|
+
|
|
188
|
+
relative_path_str = relative_path.as_posix()
|
|
189
|
+
if self.gitignore_spec and self.gitignore_spec.match_file(relative_path_str): return True
|
|
190
|
+
if any(fnmatch.fnmatch(part, pattern) for pattern in self.exclude_patterns for part in check_set): return True
|
|
191
|
+
if path.is_file() and self.include_patterns:
|
|
192
|
+
return not any(fnmatch.fnmatch(relative_path_str, pattern) for pattern in self.include_patterns)
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
def _collect_files(self) -> None:
|
|
196
|
+
collected = set()
|
|
197
|
+
for root, dirs, files in os.walk(self.root_path, topdown=True):
|
|
198
|
+
current_root = Path(root)
|
|
199
|
+
dirs[:] = [d for d in dirs if not self._is_excluded(current_root / d)]
|
|
200
|
+
for file in files:
|
|
201
|
+
file_path = current_root / file
|
|
202
|
+
if not self._is_excluded(file_path):
|
|
203
|
+
if self._is_binary(file_path):
|
|
204
|
+
self.stats['skipped_binary'] += 1
|
|
205
|
+
continue
|
|
206
|
+
collected.add(file_path)
|
|
207
|
+
self.mapped_files = sorted(list(collected))
|
|
208
|
+
|
|
209
|
+
def map_project(self) -> None:
|
|
210
|
+
"""Maps all relevant project files and gathers statistics."""
|
|
211
|
+
self._collect_files()
|
|
212
|
+
self._gather_stats()
|
|
213
|
+
|
|
214
|
+
def _gather_stats(self) -> None:
|
|
215
|
+
if not self.mapped_files: return
|
|
216
|
+
|
|
217
|
+
self.stats['total_files'] = len(self.mapped_files)
|
|
218
|
+
total_size = 0
|
|
219
|
+
total_tokens = 0
|
|
220
|
+
|
|
221
|
+
for file_path in self.mapped_files:
|
|
222
|
+
total_size += file_path.stat().st_size
|
|
223
|
+
lang = self._get_language(file_path) or "other"
|
|
224
|
+
self.stats['language_counts'][lang] += 1
|
|
225
|
+
if self._tokenizer:
|
|
226
|
+
try:
|
|
227
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
228
|
+
total_tokens += len(self._tokenizer.encode(content))
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
self.stats['total_size_bytes'] = total_size
|
|
233
|
+
self.stats['total_tokens'] = total_tokens
|
|
234
|
+
|
|
235
|
+
def get_stats(self) -> Dict:
|
|
236
|
+
"""Returns the raw project statistics."""
|
|
237
|
+
return self.stats
|
|
238
|
+
|
|
239
|
+
def get_file_count(self) -> int:
|
|
240
|
+
"""Returns the number of files that will be mapped."""
|
|
241
|
+
return len(self.mapped_files)
|
|
242
|
+
|
|
243
|
+
def generate_output_file(self, output_filename: str, tree_only: bool = False, progress=None, task_id=None) -> None:
|
|
244
|
+
"""Generates the consolidated project structure output file."""
|
|
245
|
+
output_filepath = self.root_path / output_filename
|
|
246
|
+
with output_filepath.open("w", encoding="utf-8") as f:
|
|
247
|
+
self._write_output(f, tree_only, progress, task_id)
|
|
248
|
+
|
|
249
|
+
def _write_output(self, f: TextIO, tree_only: bool, progress, task_id) -> None:
|
|
250
|
+
f.write("=" * 3 + "\n Mapped Folder Structure\n" + "=" * 3 + "\n\n")
|
|
251
|
+
f.write(self._get_tree_representation() + "\n")
|
|
252
|
+
|
|
253
|
+
if tree_only: return
|
|
254
|
+
|
|
255
|
+
for file_path in self.mapped_files:
|
|
256
|
+
self._write_file_content(f, file_path)
|
|
257
|
+
if progress and task_id is not None:
|
|
258
|
+
progress.update(task_id, advance=1)
|
|
259
|
+
|
|
260
|
+
def _write_file_content(self, f: TextIO, file_path: Path) -> None:
|
|
261
|
+
try:
|
|
262
|
+
relative_path = file_path.relative_to(self.root_path).as_posix()
|
|
263
|
+
file_size = file_path.stat().st_size
|
|
264
|
+
lang = self._get_language(file_path)
|
|
265
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
266
|
+
except (OSError, ValueError):
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
f.write("\n" + "-" * 3 + "\n")
|
|
270
|
+
f.write(f"File: {relative_path}\nSize: {file_size} bytes\n" + "-" * 3 + "\n")
|
|
271
|
+
f.write(f"```{lang}\n{content}\n```\n")
|
|
272
|
+
|
|
273
|
+
def _get_language(self, file_path: Path) -> str:
|
|
274
|
+
return self._LANGUAGE_MAP.get(file_path.suffix, self._LANGUAGE_MAP.get(file_path.name, ""))
|
|
275
|
+
|
|
276
|
+
def _get_tree_representation(self) -> str:
|
|
277
|
+
tree = self._build_file_tree()
|
|
278
|
+
if not tree: return "No files or folders to map."
|
|
279
|
+
|
|
280
|
+
def format_tree(d: Dict, prefix: str = "") -> List[str]:
|
|
281
|
+
lines = []
|
|
282
|
+
items = sorted(d.keys())
|
|
283
|
+
for i, key in enumerate(items):
|
|
284
|
+
is_last = i == len(items) - 1
|
|
285
|
+
connector = "βββ " if is_last else "βββ "
|
|
286
|
+
lines.append(f"{prefix}{connector}{key}")
|
|
287
|
+
if d[key]:
|
|
288
|
+
new_prefix = prefix + (" " if is_last else "β ")
|
|
289
|
+
lines.extend(format_tree(d[key], new_prefix))
|
|
290
|
+
return lines
|
|
291
|
+
|
|
292
|
+
root_name = list(tree.keys())[0]
|
|
293
|
+
output_lines = [root_name]
|
|
294
|
+
output_lines.extend(format_tree(tree[root_name]))
|
|
295
|
+
return "\n".join(output_lines)
|
|
296
|
+
|
|
297
|
+
def _build_file_tree(self) -> Dict[str, Any]:
|
|
298
|
+
if not self.mapped_files: return {}
|
|
299
|
+
tree = {self.root_path.name: {}}
|
|
300
|
+
project_level = tree[self.root_path.name]
|
|
301
|
+
for path in self.mapped_files:
|
|
302
|
+
parts = path.relative_to(self.root_path).parts
|
|
303
|
+
current_level = project_level
|
|
304
|
+
for part in parts:
|
|
305
|
+
current_level = current_level.setdefault(part, {})
|
|
306
|
+
return tree
|