arachna 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. arachna-1.0.1/CHANGELOG.md +194 -0
  2. arachna-1.0.1/LICENSE +21 -0
  3. arachna-1.0.1/MANIFEST.in +7 -0
  4. arachna-1.0.1/PKG-INFO +255 -0
  5. arachna-1.0.1/README.md +230 -0
  6. arachna-1.0.1/TODO.md +5 -0
  7. arachna-1.0.1/pyproject.toml +55 -0
  8. arachna-1.0.1/requirements-dev.txt +4 -0
  9. arachna-1.0.1/setup.cfg +4 -0
  10. arachna-1.0.1/src/arachna/__init__.py +3 -0
  11. arachna-1.0.1/src/arachna/__main__.py +321 -0
  12. arachna-1.0.1/src/arachna/cache.py +103 -0
  13. arachna-1.0.1/src/arachna/collector.py +167 -0
  14. arachna-1.0.1/src/arachna/completion.py +88 -0
  15. arachna-1.0.1/src/arachna/compressor.py +29 -0
  16. arachna-1.0.1/src/arachna/config.py +105 -0
  17. arachna-1.0.1/src/arachna/doctor.py +87 -0
  18. arachna-1.0.1/src/arachna/formatter.py +243 -0
  19. arachna-1.0.1/src/arachna/gatherer.py +318 -0
  20. arachna-1.0.1/src/arachna/gitignore.py +66 -0
  21. arachna-1.0.1/src/arachna/hook.py +57 -0
  22. arachna-1.0.1/src/arachna/init.py +202 -0
  23. arachna-1.0.1/src/arachna/renderer.py +62 -0
  24. arachna-1.0.1/src/arachna/runner.py +267 -0
  25. arachna-1.0.1/src/arachna/splitter.py +131 -0
  26. arachna-1.0.1/src/arachna/tokenizer.py +40 -0
  27. arachna-1.0.1/src/arachna/validator.py +59 -0
  28. arachna-1.0.1/src/arachna.egg-info/PKG-INFO +255 -0
  29. arachna-1.0.1/src/arachna.egg-info/SOURCES.txt +69 -0
  30. arachna-1.0.1/src/arachna.egg-info/dependency_links.txt +1 -0
  31. arachna-1.0.1/src/arachna.egg-info/entry_points.txt +2 -0
  32. arachna-1.0.1/src/arachna.egg-info/top_level.txt +1 -0
  33. arachna-1.0.1/tests/__init__.py +0 -0
  34. arachna-1.0.1/tests/cache/test_cache.py +147 -0
  35. arachna-1.0.1/tests/collector/test_collect.py +251 -0
  36. arachna-1.0.1/tests/completion/test_completion.py +84 -0
  37. arachna-1.0.1/tests/compressor/test_compress.py +37 -0
  38. arachna-1.0.1/tests/config/test_find_config.py +20 -0
  39. arachna-1.0.1/tests/config/test_get_profile.py +18 -0
  40. arachna-1.0.1/tests/config/test_load_config.py +16 -0
  41. arachna-1.0.1/tests/doctor/test_doctor.py +225 -0
  42. arachna-1.0.1/tests/formatter/test_binary.py +78 -0
  43. arachna-1.0.1/tests/formatter/test_format_file.py +158 -0
  44. arachna-1.0.1/tests/formatter/test_format_json.py +26 -0
  45. arachna-1.0.1/tests/formatter/test_format_xml.py +24 -0
  46. arachna-1.0.1/tests/formatter/test_is_excluded.py +19 -0
  47. arachna-1.0.1/tests/formatter/test_lang_extension.py +27 -0
  48. arachna-1.0.1/tests/formatter/test_lang_filename.py +27 -0
  49. arachna-1.0.1/tests/formatter/test_shebang.py +39 -0
  50. arachna-1.0.1/tests/formatter/test_shebang_edge.py +20 -0
  51. arachna-1.0.1/tests/gatherer/test_collect_sections.py +65 -0
  52. arachna-1.0.1/tests/gatherer/test_dry_run.py +64 -0
  53. arachna-1.0.1/tests/gatherer/test_gather_files.py +70 -0
  54. arachna-1.0.1/tests/gatherer/test_incremental.py +92 -0
  55. arachna-1.0.1/tests/gitignore/test_load_gitignore.py +52 -0
  56. arachna-1.0.1/tests/hook/test_install_hook.py +134 -0
  57. arachna-1.0.1/tests/init/test_init.py +89 -0
  58. arachna-1.0.1/tests/main/test_clean.py +45 -0
  59. arachna-1.0.1/tests/main/test_list.py +13 -0
  60. arachna-1.0.1/tests/main/test_main_collect.py +47 -0
  61. arachna-1.0.1/tests/main/test_main_dry_run.py +24 -0
  62. arachna-1.0.1/tests/main/test_validate.py +22 -0
  63. arachna-1.0.1/tests/renderer/test_format_line.py +29 -0
  64. arachna-1.0.1/tests/renderer/test_render_dry_run.py +79 -0
  65. arachna-1.0.1/tests/runner/test_run_command.py +217 -0
  66. arachna-1.0.1/tests/splitter/test_build_parts.py +60 -0
  67. arachna-1.0.1/tests/splitter/test_split_modes.py +94 -0
  68. arachna-1.0.1/tests/splitter/test_split_to_sections.py +21 -0
  69. arachna-1.0.1/tests/test_tokenizer.py +43 -0
  70. arachna-1.0.1/tests/tokenizer/test_plugin.py +48 -0
  71. arachna-1.0.1/tests/validator/test_validate_profile.py +53 -0
@@ -0,0 +1,194 @@
1
+ # Changelog
2
+
3
+ ## v1.0.1 — Windows test fixes
4
+
5
+ - tests/cache: _make_entry uses real SHA256 hash instead of "dummy"
6
+ - tests/cache: time.sleep(0.01) in test_get_changed_files_modified and test_get_changed_files_mixed
7
+ - tests/formatter: test_permission_denied skipped on Windows (chmod 0o000 unsupported)
8
+ - tests/gatherer: time.sleep(0.01) in test_collect_sections_incremental_detects_modified
9
+ - tests/hook: S_IXUSR check only on Unix (Windows does not support executable bits)
10
+ - __init__.py: bump __version__ to 1.0.1
11
+ - pyproject.toml: bump version to 1.0.1
12
+
13
+ ## v1.0.0 — Public release
14
+
15
+ - First public release on PyPI
16
+ - __init__.py: bump __version__ to 1.0.0
17
+ - pyproject.toml: bump version to 1.0.0
18
+
19
+ ## v0.9.5 — GitHub prep
20
+
21
+ - pyproject.toml: URLs updated to github.com/dead-duke/arachna
22
+ - README.md: added repo link, badges, safety section, doctor, hooks, full commands and options
23
+ - __init__.py: bump __version__ to 0.9.5
24
+
25
+ ## v0.9.4 — Final polish
26
+
27
+ - runner.py: import json вынесен на верхний уровень модуля
28
+ - gatherer.py: _assemble_content — общая функция сборки контента для collect и dry_run
29
+ - collector.py: collect использует _assemble_content, убран дублирующийся код
30
+ - __init__.py: bump __version__ to 0.9.4
31
+ - pyproject.toml: bump version to 0.9.4
32
+
33
+ ## v0.9.3 — Final fixes
34
+
35
+ - __main__.py: _cmd_validate использует get_profile() для консистентной валидации
36
+ - cache.py, gitignore.py: комментарии к _MAX_HASH_SIZE и _MAX_GITIGNORE_SIZE
37
+ - gitignore.py: обработка ValueError от relative_to на всех вызовах
38
+ - tests/runner: subprocess.CompletedProcess вместо MagicMock
39
+ - __init__.py: bump __version__ to 0.9.3
40
+
41
+ ## v0.9.2 — Pre-release fixes
42
+
43
+ - hook.py: git_dir.exists() → git_dir.is_dir()
44
+ - doctor.py: проверка project_root.is_dir() перед load_gitignore_patterns
45
+ - gitignore.py: обработка ValueError от relative_to для симлинков
46
+ - __main__.py: _cmd_doctor и _cmd_install_hook без неиспользуемых параметров
47
+ - tests/doctor: тесты на _cmd_doctor и _cmd_install_hook с проверкой sys.exit
48
+ - __init__.py: bump __version__ to 0.9.2
49
+
50
+ ## v0.9.1 — Version sync
51
+
52
+ - __init__.py: bump __version__ to 0.9.1
53
+ - pyproject.toml: bump version to 0.9.1
54
+
55
+ ## v0.9.0 — Infrastructure
56
+
57
+ - PyPI-упаковка: authors, keywords, urls в pyproject.toml
58
+ - Кроссплатформенные тесты (Windows CI)
59
+
60
+ ## v0.8.5 — Sandbox
61
+
62
+ - runner.py: dry-run + интерактивное подтверждение для недоверенных команд
63
+ - runner.py: _is_safe_command для проверки безопасности в dry-run режиме
64
+
65
+ ## v0.8.4 — Merge
66
+
67
+ - collector.py: --merge для --profile, добавление вывода к существующему манифесту
68
+ - collector.py: _find_next_part_num для нумерации в merge режиме
69
+
70
+ ## v0.8.3 — Git hooks
71
+
72
+ - hook.py: arachna --install-hook, установка post-commit хука
73
+ - hook.py: настраиваемая команда через .arachna.json hook.post-commit
74
+ - hook.py: --force для перезаписи существующего хука
75
+
76
+ ## v0.8.2 — Doctor
77
+
78
+ - doctor.py: arachna --doctor, проверка конфига и корректности контекста
79
+ - doctor.py: run_doctor и print_doctor для программного использования
80
+
81
+ ## v0.8.1 — Low fixes
82
+
83
+ - config.py: DEFAULT_EXCLUDE генерируется из _COMMON_EXCLUDE_DIRS
84
+ - splitter.py: токенизаторное усечение через бинарный поиск вместо CHARS_PER_TOKEN
85
+ - tests/splitter: тесты на проброс кастомного токенизатора (MagicMock)
86
+
87
+ ## v0.8.0 — God function
88
+
89
+ - gatherer.py: декомпозиция _collect_named_sections
90
+ - gatherer.py: _collect_directory_sections и _collect_file_sections
91
+
92
+ ## v0.7.5 — Truncation API + shlex
93
+
94
+ - splitter.py: was_truncated через logger.warning вместо print
95
+ - runner.py: проверка пустой строки и непарных кавычек перед shlex.split
96
+
97
+ ## v0.7.4 — Sandbox pipe fix
98
+
99
+ - runner.py: проверка частей пайпа по отдельности в _validate_command
100
+ - runner.py: _resolve_base вместо _resolve_command
101
+
102
+ ## v0.7.3 — Test stability
103
+
104
+ - tests: замена os.chdir на tmp_path/monkeypatch (все модули)
105
+ - tests/runner: замокать subprocess.run
106
+ - tests/config: изоляция от родительского .arachna.json
107
+ - tests/gatherer/test_incremental.py: переписан на интеграционный тест
108
+
109
+ ## v0.7.2 — Architecture cleanup
110
+
111
+ - gatherer.py: удалено глобальное _TOKENIZE, get_tokenizer, set_tokenizer
112
+ - collector.py: убран fallback write_text в save_manifest
113
+ - splitter.py: вынесен CHARS_PER_TOKEN, добавлен флаг truncated в _handle_single
114
+ - config.py + gitignore.py: унифицированы EXCLUDED_DIRS
115
+ - CHANGELOG.md: исправлена дезинформация и дублирование
116
+
117
+ ## v0.7.1 — Critical fixes
118
+
119
+ - runner.py: удалены интерпретаторы (python, node, ruby, perl, php) из _ALLOWED_COMMANDS
120
+ - splitter.py: исправлен проброс tokenizer в _build_parts (keyword args)
121
+ - __main__.py: _apply_args_to_profile возвращает копию, не мутирует оригинал
122
+
123
+ ## v0.7.0 — Security sandbox, architecture cleanup
124
+
125
+ - runner.py: sandbox-валидация _validate_command с _BLOCKED_PATTERNS и _ALLOWED_COMMANDS
126
+ - runner.py: аудит-лог команд в .arachna_commands.log
127
+ - cache.py: атомарная запись через tempfile + os.replace
128
+ - gitignore.py: ограничение размера, фильтрация EXCLUDED_DIRS, детект бинарных файлов
129
+ - formatter.py: проверка размера до read_text, verbose skip reasons
130
+ - __main__.py: рефакторинг _cmd_all и _cmd_single через _run_profile
131
+ - gatherer.py: set_tokenizer/get_tokenizer deprecated
132
+ - 179 tests, 90% coverage
133
+
134
+ ## v0.6.0 — Pluggable tokenizer
135
+
136
+ - tokenizer.py: load_tokenizer(spec)
137
+ - tokenizer field в profile
138
+ - Проброшен через collector → gatherer
139
+ - 179 tests, 90% coverage
140
+
141
+ ## v0.5.0 — Tests, safety, audit fixes
142
+
143
+ - Тесты: cache, completion, init, formatter xml/json, incremental, manifest
144
+ - Убран compress_indent (небезопасный для Python)
145
+ - Безопасное сжатие: пустые строки + trailing spaces
146
+ - Shell security warning в README
147
+ - LICENSE (MIT)
148
+ - 175 tests, 90% coverage
149
+
150
+ ## v0.4.2 — Audit fixes
151
+
152
+ - Убран мёртвый код в gatherer.py
153
+ - Исправлены CJK token тесты
154
+ - README: рекомендация по token margin
155
+
156
+ ## v0.4.1 — Table of contents + manifest
157
+
158
+ - TOC в каждой части: список файлов
159
+ - chat-manifest.md: сводка всех собранных файлов
160
+
161
+ ## v0.4.0 — Shell completion + hooks
162
+
163
+ - bash и zsh completion (arachna --completion bash|zsh)
164
+ - post_commands в профиле: запуск после коллекта
165
+ - 144 tests, 70% coverage
166
+
167
+ ## v0.3.0 — Compress, incremental, formats, binary
168
+
169
+ - Whitespace compression (--compress)
170
+ - Инкрементальный режим: mtime кэш (--incremental)
171
+ - section_format: markdown (default), xml, json (--format)
172
+ - include_binary: base64 с фильтрами по размеру и расширению
173
+ - 140 tests
174
+
175
+ ## v0.2.2 — Git split marker, per-profile manifest cleanup
176
+
177
+ - git split_marker: \n=== COMMIT:
178
+ - --all: очистка всех файлов, пересборка всех профилей
179
+ - --profile: очистка только этого профиля
180
+
181
+ ## v0.2.1 — arachna init
182
+
183
+ - --init interactive + --defaults auto-detect
184
+
185
+ ## v0.2.0 — Single file output, manifest, test reorg
186
+
187
+ - chat-code.md, manifest, arachna_context/, 129 tests, 90% coverage
188
+
189
+ ## v0.1.5 — Shebang Detection
190
+ ## v0.1.4 — Tests & Bugfixes
191
+ ## v0.1.3 — Validate & Gitignore
192
+ ## v0.1.2 — Dry-run, renderer, pre-commit
193
+ ## v0.1.1 — Tests + fixes
194
+ ## v0.1.0 — MVP
arachna-1.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Artem Terenin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,7 @@
1
+ include README.md
2
+ include CHANGELOG.md
3
+ include LICENSE
4
+ include TODO.md
5
+ include requirements-dev.txt
6
+ recursive-include src *.py
7
+ recursive-include tests *.py
arachna-1.0.1/PKG-INFO ADDED
@@ -0,0 +1,255 @@
1
+ Metadata-Version: 2.4
2
+ Name: arachna
3
+ Version: 1.0.1
4
+ Summary: Context collector for AI — gathers project files into token-limited chunks
5
+ Author: Artem Terenin
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/dead-duke/arachna
8
+ Project-URL: Repository, https://github.com/dead-duke/arachna
9
+ Project-URL: Issues, https://github.com/dead-duke/arachna/issues
10
+ Project-URL: Changelog, https://github.com/dead-duke/arachna/blob/main/CHANGELOG.md
11
+ Keywords: ai,context,tokenizer,code-collector,llm
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Utilities
20
+ Classifier: Intended Audience :: Developers
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Dynamic: license-file
25
+
26
+ # arachna
27
+
28
+ [![PyPI version](https://img.shields.io/pypi/v/arachna)](https://pypi.org/project/arachna/)
29
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
30
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
31
+
32
+ Context collector for AI — gathers project files into token-limited chunks.
33
+
34
+ ## What is arachna
35
+
36
+ arachna is a command-line tool that collects your project's source code and documentation into files ready to be sent to an AI. It understands tokens (not lines) and splits output smartly so nothing gets cut in the middle.
37
+
38
+ ## Why arachna
39
+
40
+ - Token-aware splitting: other tools split by lines, arachna splits by tokens
41
+ - Zero dependencies: just Python stdlib
42
+ - Multiple profiles: code, docs, tests, git history
43
+ - Smart defaults: arachna --init detects your project in seconds
44
+
45
+ ## Install
46
+ ```
47
+ pip install arachna
48
+ ```
49
+
50
+ ## Quick start
51
+ ```
52
+ cd your-project
53
+ arachna --init
54
+ arachna --all
55
+ ```
56
+ Creates arachna_context/ folder with .md files ready for AI.
57
+
58
+ ## Commands
59
+ ```
60
+ arachna --init interactive setup
61
+ arachna --init --defaults auto-detect everything
62
+ arachna --all collect all profiles
63
+ arachna --profile code collect one profile
64
+ arachna --all --dry-run preview without writing
65
+ arachna --clean remove collected files
66
+ arachna --list show profiles
67
+ arachna --validate check config for errors
68
+ arachna --doctor run full diagnostic
69
+ arachna --install-hook install git post-commit hook
70
+ ```
71
+
72
+ ## Options
73
+
74
+ | Option | Description |
75
+ |--------|-------------|
76
+ | `--output-dir path` | where to write (default: arachna_context/) |
77
+ | `--verbose` | show skipped files |
78
+ | `--compress` | remove blank lines and trailing spaces |
79
+ | `--incremental` | only files changed since last run |
80
+ | `--format xml` | markdown (default), xml, or json |
81
+ | `--merge` | append to existing output instead of replacing |
82
+ | `--dry-run` | preview without writing files |
83
+ | `--force` | force overwrite with `--install-hook` |
84
+
85
+ ## Safety
86
+
87
+ Commands in `.arachna.json` (pre_commands, post_commands, command) are validated before execution. Unknown or dangerous commands are blocked by default. Use `--dry-run` to preview what will be executed before running.
88
+
89
+ ## Doctor
90
+
91
+ `arachna --doctor` runs a full diagnostic of your configuration — validates all profiles, checks that directories and files exist, and verifies `.gitignore` integration. Use it when something doesn't work as expected.
92
+
93
+ ## Git hooks
94
+
95
+ `arachna --install-hook` installs a post-commit hook that automatically runs arachna after each commit. Configure the command in `.arachna.json`:
96
+
97
+ ```json
98
+ {
99
+ "hook": {
100
+ "post-commit": "arachna --all --incremental"
101
+ }
102
+ }
103
+ ```
104
+
105
+ ## Configuration (.arachna.json)
106
+
107
+ arachna uses profiles to define what and how to collect.
108
+
109
+ Example for a Python project:
110
+
111
+ ```json
112
+ {
113
+ "project_name": "MyProject",
114
+ "profiles": {
115
+ "code": {
116
+ "split_mode": "by_file",
117
+ "directories": ["src", "app"],
118
+ "patterns": ["*.py"],
119
+ "files": ["pyproject.toml", "requirements.txt"],
120
+ "pre_commands": ["tree src app"],
121
+ "max_tokens": 16000
122
+ },
123
+ "tests": {
124
+ "split_mode": "by_file",
125
+ "directories": ["tests"],
126
+ "patterns": ["*.py"],
127
+ "max_tokens": 16000
128
+ },
129
+ "docs": {
130
+ "split_mode": "by_file",
131
+ "files": ["README.md", "TODO.md", "CHANGELOG.md"],
132
+ "max_tokens": 16000
133
+ },
134
+ "git": {
135
+ "split_mode": "by_marker",
136
+ "split_marker": "\n=== COMMIT:",
137
+ "command": "git log --reverse --format='=== COMMIT: %h ===%nTITLE: %s%n%nMESSAGE:%n%b%n'",
138
+ "max_tokens": 16000
139
+ }
140
+ }
141
+ }
142
+ ```
143
+
144
+ ## Split modes
145
+
146
+ by_file: code and docs, each file stays intact (default)
147
+ by_paragraph: logs, splits on blank lines
148
+ by_marker: git history, splits on custom marker
149
+ single: everything in one file, truncates if too big
150
+
151
+ ## All config fields
152
+
153
+ split_mode: by_file, by_paragraph, by_marker, or single
154
+ split_marker: string for by_marker mode
155
+ directories: folders to scan
156
+ patterns: glob patterns like ["*.py"]
157
+ files: specific files to include
158
+ exclude_patterns: glob patterns to skip
159
+ pre_commands: shell commands before collection
160
+ post_commands: shell commands after collection
161
+ command: use command output instead of files
162
+ max_tokens: token limit per output file
163
+ section_format: markdown, xml, or json
164
+ compress: enable safe whitespace compression (blank lines, trailing spaces). Does not modify indentation.
165
+ include_binary: include binaries as base64 (true/false)
166
+ binary_extensions: whitelist like [".png"]
167
+ binary_max_mb: max binary file size in MB
168
+
169
+ ## Output
170
+
171
+ Files go to arachna_context/ (configurable):
172
+
173
+ arachna_context/
174
+ .arachna_manifest.json
175
+ chat-manifest.md # summary of all files
176
+ chat-code.md
177
+ chat-tests.md
178
+ chat-docs.md
179
+ chat-git.md
180
+
181
+ When content exceeds max_tokens, files are numbered: chat-code_1.md, chat-code_2.md...
182
+
183
+ ## Manifest and cleanup
184
+
185
+ Every created file is tracked in .arachna_manifest.json. Running --all again removes old files automatically. With --profile, only that profile's files are cleaned.
186
+
187
+ ## Incremental mode
188
+
189
+ With --incremental, arachna skips files unchanged since last run. Uses .arachna_cache.json.
190
+
191
+ ## Tokenizer
192
+
193
+ arachna uses a conservative estimate: 4 characters = 1 token.
194
+ This works for any model with a 20-30% safety margin.
195
+
196
+ ### Built-in (default)
197
+
198
+ No dependencies. Always works. Set max_tokens below your model's context window:
199
+ - 8192 window → max_tokens: 6000
200
+ - 32768 window → max_tokens: 24000
201
+
202
+ ### Custom tokenizer
203
+
204
+ Add to your .arachna.json:
205
+
206
+ "tokenizer": "my_module:count_tokens"
207
+
208
+ Your module must export count_tokens(text) -> int. Example:
209
+
210
+ # my_tok.py
211
+ def count_tokens(text: str) -> int:
212
+ return max(1, len(text) // 4) # your logic here
213
+
214
+ ### Cloud models
215
+
216
+ For exact token counts with cloud APIs, install tiktoken:
217
+
218
+ pip install tiktoken
219
+
220
+ "tokenizer": "tiktoken:cl100k_base" # GPT-4, DeepSeek
221
+ "tokenizer": "tiktoken:o200k_base" # GPT-4o
222
+
223
+ ### Local models
224
+
225
+ For exact token counts with HuggingFace tokenizers, install transformers:
226
+
227
+ pip install transformers
228
+
229
+ "tokenizer": "transformers:Qwen/Qwen2.5-7B-Instruct"
230
+ "tokenizer": "transformers:mistralai/Mistral-7B-Instruct-v0.3"
231
+ "tokenizer": "transformers:google/gemma-7b"
232
+
233
+ Note: transformers is a heavy dependency (gigabytes). Use only if you need exact counts.
234
+ For most local models, the built-in estimate with safety margin is sufficient.
235
+
236
+ ## Supported project types
237
+
238
+ arachna --init auto-detects:
239
+
240
+ Python: src/, app/, tests/, *.py, pyproject.toml, requirements.txt
241
+ JS/TS: src/, tests/, *.js, *.ts, package.json
242
+ Go: cmd/, pkg/, *.go, go.mod
243
+ Rust: src/, tests/, *.rs, Cargo.toml
244
+
245
+ Also: README.md, TODO.md, CHANGELOG.md, Makefile, config/, docs/, data/prompts/.
246
+
247
+ ## Links
248
+
249
+ - [GitHub Repository](https://github.com/dead-duke/arachna)
250
+ - [Issue Tracker](https://github.com/dead-duke/arachna/issues)
251
+ - [Changelog](https://github.com/dead-duke/arachna/blob/main/CHANGELOG.md)
252
+
253
+ ## License
254
+
255
+ MIT