llm-autotune 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. llm_autotune-0.1.0/.claude/settings.local.json +81 -0
  2. llm_autotune-0.1.0/.github/workflows/publish.yml +124 -0
  3. llm_autotune-0.1.0/.github/workflows/test.yml +41 -0
  4. llm_autotune-0.1.0/.gitignore +33 -0
  5. llm_autotune-0.1.0/Formula/llm-autotune.rb +32 -0
  6. llm_autotune-0.1.0/LICENSE +21 -0
  7. llm_autotune-0.1.0/PKG-INFO +578 -0
  8. llm_autotune-0.1.0/README.md +534 -0
  9. llm_autotune-0.1.0/autotune/__init__.py +39 -0
  10. llm_autotune-0.1.0/autotune/api/__init__.py +35 -0
  11. llm_autotune-0.1.0/autotune/api/backends/__init__.py +14 -0
  12. llm_autotune-0.1.0/autotune/api/backends/base.py +59 -0
  13. llm_autotune-0.1.0/autotune/api/backends/chain.py +434 -0
  14. llm_autotune-0.1.0/autotune/api/backends/mlx_backend.py +502 -0
  15. llm_autotune-0.1.0/autotune/api/backends/openai_compat.py +164 -0
  16. llm_autotune-0.1.0/autotune/api/chat.py +1471 -0
  17. llm_autotune-0.1.0/autotune/api/conversation.py +354 -0
  18. llm_autotune-0.1.0/autotune/api/ctx_utils.py +88 -0
  19. llm_autotune-0.1.0/autotune/api/hardware_tuner.py +299 -0
  20. llm_autotune-0.1.0/autotune/api/kv_manager.py +302 -0
  21. llm_autotune-0.1.0/autotune/api/local_models.py +200 -0
  22. llm_autotune-0.1.0/autotune/api/model_selector.py +516 -0
  23. llm_autotune-0.1.0/autotune/api/ollama_pull.py +307 -0
  24. llm_autotune-0.1.0/autotune/api/profiles.py +120 -0
  25. llm_autotune-0.1.0/autotune/api/running_models.py +217 -0
  26. llm_autotune-0.1.0/autotune/api/server.py +1063 -0
  27. llm_autotune-0.1.0/autotune/api/thinking.py +192 -0
  28. llm_autotune-0.1.0/autotune/bench/__init__.py +33 -0
  29. llm_autotune-0.1.0/autotune/bench/compare.py +843 -0
  30. llm_autotune-0.1.0/autotune/bench/runner.py +586 -0
  31. llm_autotune-0.1.0/autotune/cli.py +3193 -0
  32. llm_autotune-0.1.0/autotune/config/__init__.py +15 -0
  33. llm_autotune-0.1.0/autotune/config/generator.py +409 -0
  34. llm_autotune-0.1.0/autotune/context/__init__.py +22 -0
  35. llm_autotune-0.1.0/autotune/context/budget.py +97 -0
  36. llm_autotune-0.1.0/autotune/context/classifier.py +147 -0
  37. llm_autotune-0.1.0/autotune/context/compressor.py +256 -0
  38. llm_autotune-0.1.0/autotune/context/extractor.py +237 -0
  39. llm_autotune-0.1.0/autotune/context/window.py +406 -0
  40. llm_autotune-0.1.0/autotune/db/__init__.py +3 -0
  41. llm_autotune-0.1.0/autotune/db/fingerprint.py +39 -0
  42. llm_autotune-0.1.0/autotune/db/store.py +587 -0
  43. llm_autotune-0.1.0/autotune/hardware/__init__.py +3 -0
  44. llm_autotune-0.1.0/autotune/hardware/profiler.py +373 -0
  45. llm_autotune-0.1.0/autotune/hardware/ram_advisor.py +127 -0
  46. llm_autotune-0.1.0/autotune/hub/__init__.py +3 -0
  47. llm_autotune-0.1.0/autotune/hub/fetcher.py +476 -0
  48. llm_autotune-0.1.0/autotune/memory/__init__.py +12 -0
  49. llm_autotune-0.1.0/autotune/memory/estimator.py +98 -0
  50. llm_autotune-0.1.0/autotune/memory/noswap.py +298 -0
  51. llm_autotune-0.1.0/autotune/metrics/__init__.py +70 -0
  52. llm_autotune-0.1.0/autotune/metrics/ollama_client.py +310 -0
  53. llm_autotune-0.1.0/autotune/metrics/vram.py +219 -0
  54. llm_autotune-0.1.0/autotune/models/__init__.py +17 -0
  55. llm_autotune-0.1.0/autotune/models/quality.py +281 -0
  56. llm_autotune-0.1.0/autotune/models/registry.py +589 -0
  57. llm_autotune-0.1.0/autotune/output/__init__.py +3 -0
  58. llm_autotune-0.1.0/autotune/output/formatter.py +364 -0
  59. llm_autotune-0.1.0/autotune/recall/__init__.py +3 -0
  60. llm_autotune-0.1.0/autotune/recall/embedder.py +213 -0
  61. llm_autotune-0.1.0/autotune/recall/extractor.py +81 -0
  62. llm_autotune-0.1.0/autotune/recall/manager.py +338 -0
  63. llm_autotune-0.1.0/autotune/recall/store.py +488 -0
  64. llm_autotune-0.1.0/autotune/session/__init__.py +4 -0
  65. llm_autotune-0.1.0/autotune/session/advisor.py +817 -0
  66. llm_autotune-0.1.0/autotune/session/controller.py +486 -0
  67. llm_autotune-0.1.0/autotune/session/dashboard.py +518 -0
  68. llm_autotune-0.1.0/autotune/session/monitor.py +459 -0
  69. llm_autotune-0.1.0/autotune/session/types.py +138 -0
  70. llm_autotune-0.1.0/autotune/ttft/__init__.py +49 -0
  71. llm_autotune-0.1.0/autotune/ttft/optimizer.py +330 -0
  72. llm_autotune-0.1.0/proof_results.json +223 -0
  73. llm_autotune-0.1.0/pyproject.toml +81 -0
  74. llm_autotune-0.1.0/resource_results.json +468 -0
  75. llm_autotune-0.1.0/scripts/benchmark.py +345 -0
  76. llm_autotune-0.1.0/scripts/proof.py +1077 -0
  77. llm_autotune-0.1.0/scripts/resource_test.py +526 -0
  78. llm_autotune-0.1.0/scripts/stress_test.py +1029 -0
  79. llm_autotune-0.1.0/stress_results.json +2156 -0
  80. llm_autotune-0.1.0/stress_results_fast.json +318 -0
  81. llm_autotune-0.1.0/tests/__init__.py +0 -0
  82. llm_autotune-0.1.0/tests/test_context_budget.py +158 -0
  83. llm_autotune-0.1.0/tests/test_ctx_utils.py +145 -0
  84. llm_autotune-0.1.0/tests/test_kv_manager.py +278 -0
  85. llm_autotune-0.1.0/tests/test_profiles.py +84 -0
  86. llm_autotune-0.1.0/tests/test_recall_extractor.py +149 -0
  87. llm_autotune-0.1.0/tests/test_recall_store.py +362 -0
  88. llm_autotune-0.1.0/tests/test_server_utils.py +344 -0
  89. llm_autotune-0.1.0/tests/test_ttft_optimizer.py +279 -0
@@ -0,0 +1,81 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(autotune recommend:*)",
5
+ "WebFetch(domain:ai.google.dev)",
6
+ "WebFetch(domain:mistral.ai)",
7
+ "Bash(autotune fetch:*)",
8
+ "Bash(autotune fetch-many:*)",
9
+ "Bash(autotune db:*)",
10
+ "Bash(pip install:*)",
11
+ "Bash(autotune session:*)",
12
+ "Bash(python -c \"from autotune.api.server import app; from autotune.api.chat import start_chat; from autotune.api.profiles import PROFILES; print\\(''imports OK''\\); print\\(''profiles:'', list\\(PROFILES.keys\\(\\)\\)\\)\")",
13
+ "Bash(autotune telemetry:*)",
14
+ "Bash(find /Users/tanavchinthapatla/Local LLM Optimizer/autotune/session -type f -name *.py)",
15
+ "Bash(python3 -c ':*)",
16
+ "Bash(python -m py_compile autotune/cli.py)",
17
+ "Bash(python -m py_compile autotune/session/advisor.py)",
18
+ "Bash(python -m py_compile autotune/session/dashboard.py)",
19
+ "Bash(python -m py_compile autotune/session/controller.py)",
20
+ "Bash(autotune stress-test:*)",
21
+ "Bash(python -c \"import sys; sys.path.insert\\(0, '.'\\); from scripts.stress_test import PROMPTS, SUSTAINED_PROMPTS, BIG_CONTEXT_PROMPT; print\\(f'Prompts OK: {len\\(PROMPTS\\)} main, {len\\(SUSTAINED_PROMPTS\\)} sustained'\\); print\\('Import clean'\\)\")",
22
+ "Bash(curl -s http://localhost:11434/api/tags)",
23
+ "Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); print\\([m['name'] for m in d.get\\('models',[]\\)]\\)\")",
24
+ "Bash(python -c ':*)",
25
+ "Bash(python scripts/resource_test.py --model phi4-mini:latest --runs 3 --cold 3 --output resource_results.json)",
26
+ "Read(//private/tmp/**)",
27
+ "Read(//private/var/folders/**)",
28
+ "Bash(python3 -c \"from autotune.context import ContextWindow; print\\('ContextWindow imports OK'\\)\")",
29
+ "Bash(python3 -c \"from rich.console import Console; from rich.table import Table; from rich.progress import Progress; print\\('rich ok'\\)\")",
30
+ "Bash(timeout 120 python3 scripts/proof.py --model phi4-mini:latest --runs 2 --cold-runs 2 --skip-vram)",
31
+ "Bash(python3 scripts/proof.py --model phi4-mini:latest --runs 2 --cold-runs 2 --skip-vram)",
32
+ "Bash(python3 scripts/proof.py --model phi4-mini:latest --runs 3 --cold-runs 3)",
33
+ "Bash(python3 -c \"import ast; ast.parse\\(open\\('scripts/proof.py'\\).read\\(\\)\\); print\\('syntax ok'\\)\")",
34
+ "Bash(autotune proof:*)",
35
+ "Bash(python3 -c \"from autotune.memory.noswap import NoSwapGuard, ModelArch; print\\('noswap import ok'\\)\")",
36
+ "Bash(python3 scripts/proof.py --model phi4-mini:latest --with-noswap)",
37
+ "Bash(python -c \"from autotune.cli import cli; ctx = cli.make_context\\('autotune', ['chat', '--help']\\); \")",
38
+ "Bash(autotune chat:*)",
39
+ "WebSearch",
40
+ "Bash(autotune hardware:*)",
41
+ "Bash(python -c \"from autotune.hardware.profiler import profile_hardware; p = profile_hardware\\(\\); print\\(f'RAM: {p.total_ram_gb}GB, available: {p.available_ram_gb}GB, arch: {p.arch}'\\)\")",
42
+ "Bash(python -m py_compile autotune/cli.py autotune/api/chat.py autotune/api/kv_manager.py autotune/api/server.py autotune/api/model_selector.py autotune/api/backends/mlx_backend.py autotune/memory/noswap.py autotune/ttft/optimizer.py)",
43
+ "Bash(python -m py_compile scripts/proof.py scripts/benchmark.py scripts/stress_test.py scripts/resource_test.py)",
44
+ "Bash(ollama:*)",
45
+ "Bash(brew upgrade:*)",
46
+ "Read(//opt/homebrew/bin/**)",
47
+ "Read(//usr/local/bin/**)",
48
+ "Bash(python -m py_compile autotune/models/registry.py autotune/models/quality.py autotune/api/ollama_pull.py autotune/api/model_selector.py autotune/session/dashboard.py autotune/api/local_models.py autotune/bench/runner.py)",
49
+ "Bash(python -m py_compile autotune/api/chat.py autotune/api/conversation.py)",
50
+ "Bash(curl -s -X DELETE http://localhost:11434/api/delete -d '{\"model\":\"nonexistent\"}')",
51
+ "Bash(echo \"exit: $?\")",
52
+ "Bash(python -m autotune delete --help)",
53
+ "Bash(python -c \"from autotune.api.ollama_pull import delete_model; print\\('delete_model imported OK'\\)\")",
54
+ "Bash(python3:*)",
55
+ "Bash(python -m autotune memory --help)",
56
+ "Bash(autotune memory:*)",
57
+ "Bash(python -m autotune proof --help)",
58
+ "Bash(python -m py_compile autotune/ttft/optimizer.py autotune/api/profiles.py autotune/api/backends/chain.py)",
59
+ "Bash(pytest tests/ -q --tb=short)",
60
+ "Bash(pip index:*)",
61
+ "Bash(pip show *)",
62
+ "Bash(autotune serve *)",
63
+ "Bash(echo \"Server PID: $!\")",
64
+ "Bash(curl -s http://localhost:8765/health)",
65
+ "Bash(curl -s http://localhost:8765/v1/models)",
66
+ "Bash(pkill -f \"autotune serve\")",
67
+ "Bash(python -c \"from autotune.api.server import app; print\\('import OK'\\)\")",
68
+ "Bash(wait)",
69
+ "Bash(curl -s -X POST http://localhost:8765/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\":\"qwen3:8b\",\"messages\":[{\"role\":\"user\",\"content\":\"say exactly: Hello world\"}],\"stream\":true,\"max_tokens\":20}' --no-buffer)",
70
+ "Bash(python -m pytest tests/ -q)",
71
+ "Bash(python -c \"from autotune.api.server import app, _strip_thinking, _filter_thinking_stream, _is_thinking_model, completions; print\\('import OK'\\)\")",
72
+ "Bash(python -m pytest tests/)",
73
+ "Bash(python -m pytest tests/test_server_utils.py -v)",
74
+ "Bash(python -c \"from autotune.api.server import app, _VERSION; print\\(f'version={_VERSION}, app={app.version}'\\)\")",
75
+ "Bash(python -m pytest tests/ -v --tb=short)",
76
+ "Bash(python -m pytest tests/ -v)",
77
+ "Bash(rm -rf dist/)",
78
+ "Bash(python -m build)"
79
+ ]
80
+ }
81
+ }
@@ -0,0 +1,124 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*" # triggers on v0.1.0, v1.2.3, etc.
7
+
8
+ permissions:
9
+ contents: read
10
+ id-token: write # required for trusted publishing (OIDC)
11
+
12
+ jobs:
13
+ build:
14
+ name: Build distribution
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12"
23
+
24
+ - name: Install build tools
25
+ run: python -m pip install --upgrade pip build
26
+
27
+ - name: Build wheel and sdist
28
+ run: python -m build
29
+
30
+ - name: Upload build artifacts
31
+ uses: actions/upload-artifact@v4
32
+ with:
33
+ name: dist
34
+ path: dist/
35
+
36
+ publish-pypi:
37
+ name: Publish to PyPI
38
+ needs: build
39
+ runs-on: ubuntu-latest
40
+ environment:
41
+ name: pypi
42
+ url: https://pypi.org/project/llm-autotune/
43
+ steps:
44
+ - name: Download build artifacts
45
+ uses: actions/download-artifact@v4
46
+ with:
47
+ name: dist
48
+ path: dist/
49
+
50
+ - name: Publish to PyPI
51
+ uses: pypa/gh-action-pypi-publish@release/v1
52
+ # Uses OIDC trusted publishing — no API token needed.
53
+ # Set up trusted publisher at: https://pypi.org/manage/account/publishing/
54
+ # Publisher settings:
55
+ # PyPI project name : llm-autotune
56
+ # GitHub owner : tanavc1
57
+ # Repository name : llm-autotune
58
+ # Workflow filename : publish.yml
59
+ # Environment name : pypi
60
+
61
+ update-homebrew-tap:
62
+ name: Update Homebrew tap
63
+ needs: publish-pypi
64
+ runs-on: ubuntu-latest
65
+ steps:
66
+ - name: Set up Python
67
+ uses: actions/setup-python@v5
68
+ with:
69
+ python-version: "3.12"
70
+
71
+ - name: Install homebrew-pypi-poet
72
+ run: pip install homebrew-pypi-poet
73
+
74
+ - name: Wait for PyPI to propagate and generate formula
75
+ run: |
76
+ # Retry until the new version is visible on PyPI (up to ~5 min)
77
+ VERSION="${{ github.ref_name }}"
78
+ PKG_VERSION="${VERSION#v}" # strip leading 'v'
79
+ for i in $(seq 1 10); do
80
+ if pip index versions llm-autotune 2>/dev/null | grep -q "$PKG_VERSION"; then
81
+ echo "Version $PKG_VERSION found on PyPI"
82
+ break
83
+ fi
84
+ echo "Attempt $i: version not yet visible, waiting 30s..."
85
+ sleep 30
86
+ done
87
+ poet -f llm-autotune > llm-autotune.rb
88
+ echo "--- Generated formula ---"
89
+ cat llm-autotune.rb
90
+
91
+ - name: Push formula to tap repo
92
+ uses: actions/github-script@v7
93
+ env:
94
+ TAP_TOKEN: ${{ secrets.TAP_GITHUB_TOKEN }}
95
+ VERSION: ${{ github.ref_name }}
96
+ with:
97
+ script: |
98
+ const fs = require('fs');
99
+ const formula = fs.readFileSync('llm-autotune.rb', 'utf8');
100
+ const { Octokit } = require('@octokit/rest');
101
+ const octokit = new Octokit({ auth: process.env.TAP_TOKEN });
102
+
103
+ const owner = 'tanavc1';
104
+ const repo = 'homebrew-autotune';
105
+ const path = 'Formula/llm-autotune.rb';
106
+
107
+ // Get current file SHA if it exists
108
+ let sha;
109
+ try {
110
+ const { data } = await octokit.rest.repos.getContent({ owner, repo, path });
111
+ sha = data.sha;
112
+ } catch (e) {
113
+ // File doesn't exist yet — first publish
114
+ }
115
+
116
+ await octokit.rest.repos.createOrUpdateFileContents({
117
+ owner,
118
+ repo,
119
+ path,
120
+ message: `chore: update formula for ${process.env.VERSION}`,
121
+ content: Buffer.from(formula).toString('base64'),
122
+ sha,
123
+ });
124
+ console.log('Tap updated successfully.');
@@ -0,0 +1,41 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ name: Test (Python ${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python-version: ["3.10", "3.11", "3.12"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Set up Python ${{ matrix.python-version }}
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+ cache: pip
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ pip install -e ".[dev]"
31
+
32
+ - name: Run tests with coverage
33
+ run: |
34
+ pytest tests/ --cov=autotune --cov-report=term-missing --cov-report=xml -q
35
+
36
+ - name: Upload coverage report
37
+ uses: codecov/codecov-action@v4
38
+ if: matrix.python-version == '3.11'
39
+ with:
40
+ files: ./coverage.xml
41
+ fail_ci_if_error: false
@@ -0,0 +1,33 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ dist/
6
+ build/
7
+ *.egg
8
+ .env
9
+ .venv
10
+ venv/
11
+ env/
12
+
13
+ # Database files (contain machine-specific data)
14
+ *.db
15
+ *.db-shm
16
+ *.db-wal
17
+
18
+ # Benchmark output (generated, not source)
19
+ benchmark_results.json
20
+
21
+ # macOS
22
+ .DS_Store
23
+ .AppleDouble
24
+ .LSOverride
25
+
26
+ # IDE
27
+ .idea/
28
+ .vscode/
29
+ *.swp
30
+ *.swo
31
+
32
+ # autotune data dir (runtime)
33
+ data/
@@ -0,0 +1,32 @@
1
+ # This file is auto-generated by the publish.yml GitHub Actions workflow
2
+ # using homebrew-pypi-poet (https://github.com/tdsmith/homebrew-pypi-poet).
3
+ #
4
+ # To install:
5
+ # brew tap tanavc1/autotune
6
+ # brew install llm-autotune
7
+ #
8
+ # Or one-liner:
9
+ # brew install tanavc1/autotune/llm-autotune
10
+
11
+ class LlmAutotune < Formula
12
+ include Language::Python::Virtualenv
13
+
14
+ desc "Automatic local-LLM inference configuration recommender"
15
+ homepage "https://github.com/tanavc1/local-llm-autotune"
16
+ url "https://files.pythonhosted.org/packages/source/l/llm-autotune/llm_autotune-0.1.0.tar.gz"
17
+ sha256 "PLACEHOLDER" # filled automatically by publish.yml after PyPI upload
18
+ license "MIT"
19
+
20
+ depends_on "python@3.12"
21
+
22
+ # Resource SHAs are auto-populated by the publish workflow via `poet -f llm-autotune`.
23
+ # Do not edit this file by hand — push a new tag to trigger regeneration.
24
+
25
+ def install
26
+ virtualenv_install_with_resources
27
+ end
28
+
29
+ test do
30
+ assert_match version.to_s, shell_output("#{bin}/autotune --version")
31
+ end
32
+ end
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Tanav Chinthapatla
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.