arachna 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arachna-1.0.1/CHANGELOG.md +194 -0
- arachna-1.0.1/LICENSE +21 -0
- arachna-1.0.1/MANIFEST.in +7 -0
- arachna-1.0.1/PKG-INFO +255 -0
- arachna-1.0.1/README.md +230 -0
- arachna-1.0.1/TODO.md +5 -0
- arachna-1.0.1/pyproject.toml +55 -0
- arachna-1.0.1/requirements-dev.txt +4 -0
- arachna-1.0.1/setup.cfg +4 -0
- arachna-1.0.1/src/arachna/__init__.py +3 -0
- arachna-1.0.1/src/arachna/__main__.py +321 -0
- arachna-1.0.1/src/arachna/cache.py +103 -0
- arachna-1.0.1/src/arachna/collector.py +167 -0
- arachna-1.0.1/src/arachna/completion.py +88 -0
- arachna-1.0.1/src/arachna/compressor.py +29 -0
- arachna-1.0.1/src/arachna/config.py +105 -0
- arachna-1.0.1/src/arachna/doctor.py +87 -0
- arachna-1.0.1/src/arachna/formatter.py +243 -0
- arachna-1.0.1/src/arachna/gatherer.py +318 -0
- arachna-1.0.1/src/arachna/gitignore.py +66 -0
- arachna-1.0.1/src/arachna/hook.py +57 -0
- arachna-1.0.1/src/arachna/init.py +202 -0
- arachna-1.0.1/src/arachna/renderer.py +62 -0
- arachna-1.0.1/src/arachna/runner.py +267 -0
- arachna-1.0.1/src/arachna/splitter.py +131 -0
- arachna-1.0.1/src/arachna/tokenizer.py +40 -0
- arachna-1.0.1/src/arachna/validator.py +59 -0
- arachna-1.0.1/src/arachna.egg-info/PKG-INFO +255 -0
- arachna-1.0.1/src/arachna.egg-info/SOURCES.txt +69 -0
- arachna-1.0.1/src/arachna.egg-info/dependency_links.txt +1 -0
- arachna-1.0.1/src/arachna.egg-info/entry_points.txt +2 -0
- arachna-1.0.1/src/arachna.egg-info/top_level.txt +1 -0
- arachna-1.0.1/tests/__init__.py +0 -0
- arachna-1.0.1/tests/cache/test_cache.py +147 -0
- arachna-1.0.1/tests/collector/test_collect.py +251 -0
- arachna-1.0.1/tests/completion/test_completion.py +84 -0
- arachna-1.0.1/tests/compressor/test_compress.py +37 -0
- arachna-1.0.1/tests/config/test_find_config.py +20 -0
- arachna-1.0.1/tests/config/test_get_profile.py +18 -0
- arachna-1.0.1/tests/config/test_load_config.py +16 -0
- arachna-1.0.1/tests/doctor/test_doctor.py +225 -0
- arachna-1.0.1/tests/formatter/test_binary.py +78 -0
- arachna-1.0.1/tests/formatter/test_format_file.py +158 -0
- arachna-1.0.1/tests/formatter/test_format_json.py +26 -0
- arachna-1.0.1/tests/formatter/test_format_xml.py +24 -0
- arachna-1.0.1/tests/formatter/test_is_excluded.py +19 -0
- arachna-1.0.1/tests/formatter/test_lang_extension.py +27 -0
- arachna-1.0.1/tests/formatter/test_lang_filename.py +27 -0
- arachna-1.0.1/tests/formatter/test_shebang.py +39 -0
- arachna-1.0.1/tests/formatter/test_shebang_edge.py +20 -0
- arachna-1.0.1/tests/gatherer/test_collect_sections.py +65 -0
- arachna-1.0.1/tests/gatherer/test_dry_run.py +64 -0
- arachna-1.0.1/tests/gatherer/test_gather_files.py +70 -0
- arachna-1.0.1/tests/gatherer/test_incremental.py +92 -0
- arachna-1.0.1/tests/gitignore/test_load_gitignore.py +52 -0
- arachna-1.0.1/tests/hook/test_install_hook.py +134 -0
- arachna-1.0.1/tests/init/test_init.py +89 -0
- arachna-1.0.1/tests/main/test_clean.py +45 -0
- arachna-1.0.1/tests/main/test_list.py +13 -0
- arachna-1.0.1/tests/main/test_main_collect.py +47 -0
- arachna-1.0.1/tests/main/test_main_dry_run.py +24 -0
- arachna-1.0.1/tests/main/test_validate.py +22 -0
- arachna-1.0.1/tests/renderer/test_format_line.py +29 -0
- arachna-1.0.1/tests/renderer/test_render_dry_run.py +79 -0
- arachna-1.0.1/tests/runner/test_run_command.py +217 -0
- arachna-1.0.1/tests/splitter/test_build_parts.py +60 -0
- arachna-1.0.1/tests/splitter/test_split_modes.py +94 -0
- arachna-1.0.1/tests/splitter/test_split_to_sections.py +21 -0
- arachna-1.0.1/tests/test_tokenizer.py +43 -0
- arachna-1.0.1/tests/tokenizer/test_plugin.py +48 -0
- arachna-1.0.1/tests/validator/test_validate_profile.py +53 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## v1.0.1 — Windows test fixes
|
|
4
|
+
|
|
5
|
+
- tests/cache: _make_entry uses real SHA256 hash instead of "dummy"
|
|
6
|
+
- tests/cache: time.sleep(0.01) in test_get_changed_files_modified and test_get_changed_files_mixed
|
|
7
|
+
- tests/formatter: test_permission_denied skipped on Windows (chmod 0o000 unsupported)
|
|
8
|
+
- tests/gatherer: time.sleep(0.01) in test_collect_sections_incremental_detects_modified
|
|
9
|
+
- tests/hook: S_IXUSR check only on Unix (Windows does not support executable bits)
|
|
10
|
+
- __init__.py: bump __version__ to 1.0.1
|
|
11
|
+
- pyproject.toml: bump version to 1.0.1
|
|
12
|
+
|
|
13
|
+
## v1.0.0 — Public release
|
|
14
|
+
|
|
15
|
+
- First public release on PyPI
|
|
16
|
+
- __init__.py: bump __version__ to 1.0.0
|
|
17
|
+
- pyproject.toml: bump version to 1.0.0
|
|
18
|
+
|
|
19
|
+
## v0.9.5 — GitHub prep
|
|
20
|
+
|
|
21
|
+
- pyproject.toml: URLs updated to github.com/dead-duke/arachna
|
|
22
|
+
- README.md: added repo link, badges, safety section, doctor, hooks, full commands and options
|
|
23
|
+
- __init__.py: bump __version__ to 0.9.5
|
|
24
|
+
|
|
25
|
+
## v0.9.4 — Final polish
|
|
26
|
+
|
|
27
|
+
- runner.py: import json вынесен на верхний уровень модуля
|
|
28
|
+
- gatherer.py: _assemble_content — общая функция сборки контента для collect и dry_run
|
|
29
|
+
- collector.py: collect использует _assemble_content, убран дублирующийся код
|
|
30
|
+
- __init__.py: bump __version__ to 0.9.4
|
|
31
|
+
- pyproject.toml: bump version to 0.9.4
|
|
32
|
+
|
|
33
|
+
## v0.9.3 — Final fixes
|
|
34
|
+
|
|
35
|
+
- __main__.py: _cmd_validate использует get_profile() для консистентной валидации
|
|
36
|
+
- cache.py, gitignore.py: комментарии к _MAX_HASH_SIZE и _MAX_GITIGNORE_SIZE
|
|
37
|
+
- gitignore.py: обработка ValueError от relative_to на всех вызовах
|
|
38
|
+
- tests/runner: subprocess.CompletedProcess вместо MagicMock
|
|
39
|
+
- __init__.py: bump __version__ to 0.9.3
|
|
40
|
+
|
|
41
|
+
## v0.9.2 — Pre-release fixes
|
|
42
|
+
|
|
43
|
+
- hook.py: git_dir.exists() → git_dir.is_dir()
|
|
44
|
+
- doctor.py: проверка project_root.is_dir() перед load_gitignore_patterns
|
|
45
|
+
- gitignore.py: обработка ValueError от relative_to для симлинков
|
|
46
|
+
- __main__.py: _cmd_doctor и _cmd_install_hook без неиспользуемых параметров
|
|
47
|
+
- tests/doctor: тесты на _cmd_doctor и _cmd_install_hook с проверкой sys.exit
|
|
48
|
+
- __init__.py: bump __version__ to 0.9.2
|
|
49
|
+
|
|
50
|
+
## v0.9.1 — Version sync
|
|
51
|
+
|
|
52
|
+
- __init__.py: bump __version__ to 0.9.1
|
|
53
|
+
- pyproject.toml: bump version to 0.9.1
|
|
54
|
+
|
|
55
|
+
## v0.9.0 — Infrastructure
|
|
56
|
+
|
|
57
|
+
- PyPI-упаковка: authors, keywords, urls в pyproject.toml
|
|
58
|
+
- Кроссплатформенные тесты (Windows CI)
|
|
59
|
+
|
|
60
|
+
## v0.8.5 — Sandbox
|
|
61
|
+
|
|
62
|
+
- runner.py: dry-run + интерактивное подтверждение для недоверенных команд
|
|
63
|
+
- runner.py: _is_safe_command для проверки безопасности в dry-run режиме
|
|
64
|
+
|
|
65
|
+
## v0.8.4 — Merge
|
|
66
|
+
|
|
67
|
+
- collector.py: --merge для --profile, добавление вывода к существующему манифесту
|
|
68
|
+
- collector.py: _find_next_part_num для нумерации в merge режиме
|
|
69
|
+
|
|
70
|
+
## v0.8.3 — Git hooks
|
|
71
|
+
|
|
72
|
+
- hook.py: arachna --install-hook, установка post-commit хука
|
|
73
|
+
- hook.py: настраиваемая команда через .arachna.json hook.post-commit
|
|
74
|
+
- hook.py: --force для перезаписи существующего хука
|
|
75
|
+
|
|
76
|
+
## v0.8.2 — Doctor
|
|
77
|
+
|
|
78
|
+
- doctor.py: arachna --doctor, проверка конфига и корректности контекста
|
|
79
|
+
- doctor.py: run_doctor и print_doctor для программного использования
|
|
80
|
+
|
|
81
|
+
## v0.8.1 — Low fixes
|
|
82
|
+
|
|
83
|
+
- config.py: DEFAULT_EXCLUDE генерируется из _COMMON_EXCLUDE_DIRS
|
|
84
|
+
- splitter.py: токенизаторное усечение через бинарный поиск вместо CHARS_PER_TOKEN
|
|
85
|
+
- tests/splitter: тесты на проброс кастомного токенизатора (MagicMock)
|
|
86
|
+
|
|
87
|
+
## v0.8.0 — God function
|
|
88
|
+
|
|
89
|
+
- gatherer.py: декомпозиция _collect_named_sections
|
|
90
|
+
- gatherer.py: _collect_directory_sections и _collect_file_sections
|
|
91
|
+
|
|
92
|
+
## v0.7.5 — Truncation API + shlex
|
|
93
|
+
|
|
94
|
+
- splitter.py: was_truncated через logger.warning вместо print
|
|
95
|
+
- runner.py: проверка пустой строки и непарных кавычек перед shlex.split
|
|
96
|
+
|
|
97
|
+
## v0.7.4 — Sandbox pipe fix
|
|
98
|
+
|
|
99
|
+
- runner.py: проверка частей пайпа по отдельности в _validate_command
|
|
100
|
+
- runner.py: _resolve_base вместо _resolve_command
|
|
101
|
+
|
|
102
|
+
## v0.7.3 — Test stability
|
|
103
|
+
|
|
104
|
+
- tests: замена os.chdir на tmp_path/monkeypatch (все модули)
|
|
105
|
+
- tests/runner: замокать subprocess.run
|
|
106
|
+
- tests/config: изоляция от родительского .arachna.json
|
|
107
|
+
- tests/gatherer/test_incremental.py: переписан на интеграционный тест
|
|
108
|
+
|
|
109
|
+
## v0.7.2 — Architecture cleanup
|
|
110
|
+
|
|
111
|
+
- gatherer.py: удалено глобальное _TOKENIZE, get_tokenizer, set_tokenizer
|
|
112
|
+
- collector.py: убран fallback write_text в save_manifest
|
|
113
|
+
- splitter.py: вынесен CHARS_PER_TOKEN, добавлен флаг truncated в _handle_single
|
|
114
|
+
- config.py + gitignore.py: унифицированы EXCLUDED_DIRS
|
|
115
|
+
- CHANGELOG.md: исправлена дезинформация и дублирование
|
|
116
|
+
|
|
117
|
+
## v0.7.1 — Critical fixes
|
|
118
|
+
|
|
119
|
+
- runner.py: удалены интерпретаторы (python, node, ruby, perl, php) из _ALLOWED_COMMANDS
|
|
120
|
+
- splitter.py: исправлен проброс tokenizer в _build_parts (keyword args)
|
|
121
|
+
- __main__.py: _apply_args_to_profile возвращает копию, не мутирует оригинал
|
|
122
|
+
|
|
123
|
+
## v0.7.0 — Security sandbox, architecture cleanup
|
|
124
|
+
|
|
125
|
+
- runner.py: sandbox-валидация _validate_command с _BLOCKED_PATTERNS и _ALLOWED_COMMANDS
|
|
126
|
+
- runner.py: аудит-лог команд в .arachna_commands.log
|
|
127
|
+
- cache.py: атомарная запись через tempfile + os.replace
|
|
128
|
+
- gitignore.py: ограничение размера, фильтрация EXCLUDED_DIRS, детект бинарных файлов
|
|
129
|
+
- formatter.py: проверка размера до read_text, verbose skip reasons
|
|
130
|
+
- __main__.py: рефакторинг _cmd_all и _cmd_single через _run_profile
|
|
131
|
+
- gatherer.py: set_tokenizer/get_tokenizer deprecated
|
|
132
|
+
- 179 tests, 90% coverage
|
|
133
|
+
|
|
134
|
+
## v0.6.0 — Pluggable tokenizer
|
|
135
|
+
|
|
136
|
+
- tokenizer.py: load_tokenizer(spec)
|
|
137
|
+
- tokenizer field в profile
|
|
138
|
+
- Проброшен через collector → gatherer
|
|
139
|
+
- 179 tests, 90% coverage
|
|
140
|
+
|
|
141
|
+
## v0.5.0 — Tests, safety, audit fixes
|
|
142
|
+
|
|
143
|
+
- Тесты: cache, completion, init, formatter xml/json, incremental, manifest
|
|
144
|
+
- Убран compress_indent (небезопасный для Python)
|
|
145
|
+
- Безопасное сжатие: пустые строки + trailing spaces
|
|
146
|
+
- Shell security warning в README
|
|
147
|
+
- LICENSE (MIT)
|
|
148
|
+
- 175 tests, 90% coverage
|
|
149
|
+
|
|
150
|
+
## v0.4.2 — Audit fixes
|
|
151
|
+
|
|
152
|
+
- Убран мёртвый код в gatherer.py
|
|
153
|
+
- Исправлены CJK token тесты
|
|
154
|
+
- README: рекомендация по token margin
|
|
155
|
+
|
|
156
|
+
## v0.4.1 — Table of contents + manifest
|
|
157
|
+
|
|
158
|
+
- TOC в каждой части: список файлов
|
|
159
|
+
- chat-manifest.md: сводка всех собранных файлов
|
|
160
|
+
|
|
161
|
+
## v0.4.0 — Shell completion + hooks
|
|
162
|
+
|
|
163
|
+
- bash и zsh completion (arachna --completion bash|zsh)
|
|
164
|
+
- post_commands в профиле: запуск после коллекта
|
|
165
|
+
- 144 tests, 70% coverage
|
|
166
|
+
|
|
167
|
+
## v0.3.0 — Compress, incremental, formats, binary
|
|
168
|
+
|
|
169
|
+
- Whitespace compression (--compress)
|
|
170
|
+
- Инкрементальный режим: mtime кэш (--incremental)
|
|
171
|
+
- section_format: markdown (default), xml, json (--format)
|
|
172
|
+
- include_binary: base64 с фильтрами по размеру и расширению
|
|
173
|
+
- 140 tests
|
|
174
|
+
|
|
175
|
+
## v0.2.2 — Git split marker, per-profile manifest cleanup
|
|
176
|
+
|
|
177
|
+
- git split_marker: \n=== COMMIT:
|
|
178
|
+
- --all: очистка всех файлов, пересборка всех профилей
|
|
179
|
+
- --profile: очистка только этого профиля
|
|
180
|
+
|
|
181
|
+
## v0.2.1 — arachna init
|
|
182
|
+
|
|
183
|
+
- --init interactive + --defaults auto-detect
|
|
184
|
+
|
|
185
|
+
## v0.2.0 — Single file output, manifest, test reorg
|
|
186
|
+
|
|
187
|
+
- chat-code.md, manifest, arachna_context/, 129 tests, 90% coverage
|
|
188
|
+
|
|
189
|
+
## v0.1.5 — Shebang Detection
|
|
190
|
+
## v0.1.4 — Tests & Bugfixes
|
|
191
|
+
## v0.1.3 — Validate & Gitignore
|
|
192
|
+
## v0.1.2 — Dry-run, renderer, pre-commit
|
|
193
|
+
## v0.1.1 — Tests + fixes
|
|
194
|
+
## v0.1.0 — MVP
|
arachna-1.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Artem Terenin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
arachna-1.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arachna
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Context collector for AI — gathers project files into token-limited chunks
|
|
5
|
+
Author: Artem Terenin
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/dead-duke/arachna
|
|
8
|
+
Project-URL: Repository, https://github.com/dead-duke/arachna
|
|
9
|
+
Project-URL: Issues, https://github.com/dead-duke/arachna/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/dead-duke/arachna/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: ai,context,tokenizer,code-collector,llm
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Classifier: Intended Audience :: Developers
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# arachna
|
|
27
|
+
|
|
28
|
+
[](https://pypi.org/project/arachna/)
|
|
29
|
+
[](https://opensource.org/licenses/MIT)
|
|
30
|
+
[](https://www.python.org/downloads/)
|
|
31
|
+
|
|
32
|
+
Context collector for AI — gathers project files into token-limited chunks.
|
|
33
|
+
|
|
34
|
+
## What is arachna
|
|
35
|
+
|
|
36
|
+
arachna is a command-line tool that collects your project's source code and documentation into files ready to be sent to an AI. It understands tokens (not lines) and splits output smartly so nothing gets cut in the middle.
|
|
37
|
+
|
|
38
|
+
## Why arachna
|
|
39
|
+
|
|
40
|
+
- Token-aware splitting: other tools split by lines, arachna splits by tokens
|
|
41
|
+
- Zero dependencies: just Python stdlib
|
|
42
|
+
- Multiple profiles: code, docs, tests, git history
|
|
43
|
+
- Smart defaults: arachna --init detects your project in seconds
|
|
44
|
+
|
|
45
|
+
## Install
|
|
46
|
+
```
|
|
47
|
+
pip install arachna
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Quick start
|
|
51
|
+
```
|
|
52
|
+
cd your-project
|
|
53
|
+
arachna --init
|
|
54
|
+
arachna --all
|
|
55
|
+
```
|
|
56
|
+
Creates arachna_context/ folder with .md files ready for AI.
|
|
57
|
+
|
|
58
|
+
## Commands
|
|
59
|
+
```
|
|
60
|
+
arachna --init interactive setup
|
|
61
|
+
arachna --init --defaults auto-detect everything
|
|
62
|
+
arachna --all collect all profiles
|
|
63
|
+
arachna --profile code collect one profile
|
|
64
|
+
arachna --all --dry-run preview without writing
|
|
65
|
+
arachna --clean remove collected files
|
|
66
|
+
arachna --list show profiles
|
|
67
|
+
arachna --validate check config for errors
|
|
68
|
+
arachna --doctor run full diagnostic
|
|
69
|
+
arachna --install-hook install git post-commit hook
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Options
|
|
73
|
+
|
|
74
|
+
| Option | Description |
|
|
75
|
+
|--------|-------------|
|
|
76
|
+
| `--output-dir path` | where to write (default: arachna_context/) |
|
|
77
|
+
| `--verbose` | show skipped files |
|
|
78
|
+
| `--compress` | remove blank lines and trailing spaces |
|
|
79
|
+
| `--incremental` | only files changed since last run |
|
|
80
|
+
| `--format xml` | markdown (default), xml, or json |
|
|
81
|
+
| `--merge` | append to existing output instead of replacing |
|
|
82
|
+
| `--dry-run` | preview without writing files |
|
|
83
|
+
| `--force` | force overwrite with `--install-hook` |
|
|
84
|
+
|
|
85
|
+
## Safety
|
|
86
|
+
|
|
87
|
+
Commands in `.arachna.json` (pre_commands, post_commands, command) are validated before execution. Unknown or dangerous commands are blocked by default. Use `--dry-run` to preview what will be executed before running.
|
|
88
|
+
|
|
89
|
+
## Doctor
|
|
90
|
+
|
|
91
|
+
`arachna --doctor` runs a full diagnostic of your configuration — validates all profiles, checks that directories and files exist, and verifies `.gitignore` integration. Use it when something doesn't work as expected.
|
|
92
|
+
|
|
93
|
+
## Git hooks
|
|
94
|
+
|
|
95
|
+
`arachna --install-hook` installs a post-commit hook that automatically runs arachna after each commit. Configure the command in `.arachna.json`:
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{
|
|
99
|
+
"hook": {
|
|
100
|
+
"post-commit": "arachna --all --incremental"
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Configuration (.arachna.json)
|
|
106
|
+
|
|
107
|
+
arachna uses profiles to define what and how to collect.
|
|
108
|
+
|
|
109
|
+
Example for a Python project:
|
|
110
|
+
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"project_name": "MyProject",
|
|
114
|
+
"profiles": {
|
|
115
|
+
"code": {
|
|
116
|
+
"split_mode": "by_file",
|
|
117
|
+
"directories": ["src", "app"],
|
|
118
|
+
"patterns": ["*.py"],
|
|
119
|
+
"files": ["pyproject.toml", "requirements.txt"],
|
|
120
|
+
"pre_commands": ["tree src app"],
|
|
121
|
+
"max_tokens": 16000
|
|
122
|
+
},
|
|
123
|
+
"tests": {
|
|
124
|
+
"split_mode": "by_file",
|
|
125
|
+
"directories": ["tests"],
|
|
126
|
+
"patterns": ["*.py"],
|
|
127
|
+
"max_tokens": 16000
|
|
128
|
+
},
|
|
129
|
+
"docs": {
|
|
130
|
+
"split_mode": "by_file",
|
|
131
|
+
"files": ["README.md", "TODO.md", "CHANGELOG.md"],
|
|
132
|
+
"max_tokens": 16000
|
|
133
|
+
},
|
|
134
|
+
"git": {
|
|
135
|
+
"split_mode": "by_marker",
|
|
136
|
+
"split_marker": "\n=== COMMIT:",
|
|
137
|
+
"command": "git log --reverse --format='=== COMMIT: %h ===%nTITLE: %s%n%nMESSAGE:%n%b%n'",
|
|
138
|
+
"max_tokens": 16000
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Split modes
|
|
145
|
+
|
|
146
|
+
by_file: code and docs, each file stays intact (default)
|
|
147
|
+
by_paragraph: logs, splits on blank lines
|
|
148
|
+
by_marker: git history, splits on custom marker
|
|
149
|
+
single: everything in one file, truncates if too big
|
|
150
|
+
|
|
151
|
+
## All config fields
|
|
152
|
+
|
|
153
|
+
split_mode: by_file, by_paragraph, by_marker, or single
|
|
154
|
+
split_marker: string for by_marker mode
|
|
155
|
+
directories: folders to scan
|
|
156
|
+
patterns: glob patterns like ["*.py"]
|
|
157
|
+
files: specific files to include
|
|
158
|
+
exclude_patterns: glob patterns to skip
|
|
159
|
+
pre_commands: shell commands before collection
|
|
160
|
+
post_commands: shell commands after collection
|
|
161
|
+
command: use command output instead of files
|
|
162
|
+
max_tokens: token limit per output file
|
|
163
|
+
section_format: markdown, xml, or json
|
|
164
|
+
compress: enable safe whitespace compression (blank lines, trailing spaces). Does not modify indentation.
|
|
165
|
+
include_binary: include binaries as base64 (true/false)
|
|
166
|
+
binary_extensions: whitelist like [".png"]
|
|
167
|
+
binary_max_mb: max binary file size in MB
|
|
168
|
+
|
|
169
|
+
## Output
|
|
170
|
+
|
|
171
|
+
Files go to arachna_context/ (configurable):
|
|
172
|
+
|
|
173
|
+
arachna_context/
|
|
174
|
+
.arachna_manifest.json
|
|
175
|
+
chat-manifest.md # summary of all files
|
|
176
|
+
chat-code.md
|
|
177
|
+
chat-tests.md
|
|
178
|
+
chat-docs.md
|
|
179
|
+
chat-git.md
|
|
180
|
+
|
|
181
|
+
When content exceeds max_tokens, files are numbered: chat-code_1.md, chat-code_2.md...
|
|
182
|
+
|
|
183
|
+
## Manifest and cleanup
|
|
184
|
+
|
|
185
|
+
Every created file is tracked in .arachna_manifest.json. Running --all again removes old files automatically. With --profile, only that profile's files are cleaned.
|
|
186
|
+
|
|
187
|
+
## Incremental mode
|
|
188
|
+
|
|
189
|
+
With --incremental, arachna skips files unchanged since last run. Uses .arachna_cache.json.
|
|
190
|
+
|
|
191
|
+
## Tokenizer
|
|
192
|
+
|
|
193
|
+
arachna uses a conservative estimate: 4 characters = 1 token.
|
|
194
|
+
This works for any model with a 20-30% safety margin.
|
|
195
|
+
|
|
196
|
+
### Built-in (default)
|
|
197
|
+
|
|
198
|
+
No dependencies. Always works. Set max_tokens below your model's context window:
|
|
199
|
+
- 8192 window → max_tokens: 6000
|
|
200
|
+
- 32768 window → max_tokens: 24000
|
|
201
|
+
|
|
202
|
+
### Custom tokenizer
|
|
203
|
+
|
|
204
|
+
Add to your .arachna.json:
|
|
205
|
+
|
|
206
|
+
"tokenizer": "my_module:count_tokens"
|
|
207
|
+
|
|
208
|
+
Your module must export count_tokens(text) -> int. Example:
|
|
209
|
+
|
|
210
|
+
# my_tok.py
|
|
211
|
+
def count_tokens(text: str) -> int:
|
|
212
|
+
return max(1, len(text) // 4) # your logic here
|
|
213
|
+
|
|
214
|
+
### Cloud models
|
|
215
|
+
|
|
216
|
+
For exact token counts with cloud APIs, install tiktoken:
|
|
217
|
+
|
|
218
|
+
pip install tiktoken
|
|
219
|
+
|
|
220
|
+
"tokenizer": "tiktoken:cl100k_base" # GPT-4, DeepSeek
|
|
221
|
+
"tokenizer": "tiktoken:o200k_base" # GPT-4o
|
|
222
|
+
|
|
223
|
+
### Local models
|
|
224
|
+
|
|
225
|
+
For exact token counts with HuggingFace tokenizers, install transformers:
|
|
226
|
+
|
|
227
|
+
pip install transformers
|
|
228
|
+
|
|
229
|
+
"tokenizer": "transformers:Qwen/Qwen2.5-7B-Instruct"
|
|
230
|
+
"tokenizer": "transformers:mistralai/Mistral-7B-Instruct-v0.3"
|
|
231
|
+
"tokenizer": "transformers:google/gemma-7b"
|
|
232
|
+
|
|
233
|
+
Note: transformers is a heavy dependency (gigabytes). Use only if you need exact counts.
|
|
234
|
+
For most local models, the built-in estimate with safety margin is sufficient.
|
|
235
|
+
|
|
236
|
+
## Supported project types
|
|
237
|
+
|
|
238
|
+
arachna --init auto-detects:
|
|
239
|
+
|
|
240
|
+
Python: src/, app/, tests/, *.py, pyproject.toml, requirements.txt
|
|
241
|
+
JS/TS: src/, tests/, *.js, *.ts, package.json
|
|
242
|
+
Go: cmd/, pkg/, *.go, go.mod
|
|
243
|
+
Rust: src/, tests/, *.rs, Cargo.toml
|
|
244
|
+
|
|
245
|
+
Also: README.md, TODO.md, CHANGELOG.md, Makefile, config/, docs/, data/prompts/.
|
|
246
|
+
|
|
247
|
+
## Links
|
|
248
|
+
|
|
249
|
+
- [GitHub Repository](https://github.com/dead-duke/arachna)
|
|
250
|
+
- [Issue Tracker](https://github.com/dead-duke/arachna/issues)
|
|
251
|
+
- [Changelog](https://github.com/dead-duke/arachna/blob/main/CHANGELOG.md)
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT
|