prompt-complexity-analyzer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- prompt_complexity_analyzer-0.1.0/PKG-INFO +78 -0
- prompt_complexity_analyzer-0.1.0/README.md +66 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer/__init__.py +58 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer/_core.py +750 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer/model.joblib +0 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/PKG-INFO +78 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/SOURCES.txt +11 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/dependency_links.txt +1 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/entry_points.txt +2 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/requires.txt +5 -0
- prompt_complexity_analyzer-0.1.0/prompt_complexity_analyzer.egg-info/top_level.txt +1 -0
- prompt_complexity_analyzer-0.1.0/pyproject.toml +29 -0
- prompt_complexity_analyzer-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prompt-complexity-analyzer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ML-powered prompt complexity analyzer for LLM routing
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: scikit-learn>=1.3
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Requires-Dist: joblib>=1.3
|
|
10
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
11
|
+
Requires-Dist: aiohttp>=3.9
|
|
12
|
+
|
|
13
|
+
# complexity-analyzer
|
|
14
|
+
|
|
15
|
+
ML-powered prompt complexity analyzer for LLM routing.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install . # basic (keyword features)
|
|
21
|
+
pip install ".[embeddings]" # + semantic embeddings (better accuracy)
|
|
22
|
+
pip install ".[all]" # everything
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from complexity_analyzer import complexity
|
|
29
|
+
|
|
30
|
+
r = complexity("prove P≠NP")
|
|
31
|
+
print(r.score) # 9.2
|
|
32
|
+
print(r.model) # claude-opus-4-6
|
|
33
|
+
print(r.tier) # capable
|
|
34
|
+
print(r) # Score 9.2/10 | Tier: capable | Model: claude-opus-4-6 | Backend: ml
|
|
35
|
+
r.explain() # full breakdown with dimension scores
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### With semantic embeddings (requires `pip install ".[embeddings]"`)
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from complexity_analyzer import load_embedding_model, complexity
|
|
42
|
+
|
|
43
|
+
load_embedding_model() # call once at startup — downloads ~80MB once, cached
|
|
44
|
+
r = complexity("your prompt")
|
|
45
|
+
print(r.backend) # ml
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### CLI
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
complexity-analyzer -p "your prompt"
|
|
52
|
+
complexity-analyzer --only score -p "your prompt"
|
|
53
|
+
complexity-analyzer --provider openai -p "your prompt"
|
|
54
|
+
complexity-analyzer --json -p "your prompt"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Retrain with your own data
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Generate labeled data
|
|
61
|
+
python generate_dataset.py # 50k examples, free APIs
|
|
62
|
+
python generate_targeted.py # targeted complex examples
|
|
63
|
+
|
|
64
|
+
# Retrain
|
|
65
|
+
python training.py --data dataset.jsonl --balance
|
|
66
|
+
|
|
67
|
+
# Replace bundled model
|
|
68
|
+
cp model.joblib complexity_analyzer/model.joblib
|
|
69
|
+
pip install -e . # re-install to pick up new model
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Score scale
|
|
73
|
+
|
|
74
|
+
| Score | Tier | Use |
|
|
75
|
+
|---|---|---|
|
|
76
|
+
| 1–3.5 | fast | Haiku / GPT-4o-mini / Gemini Flash |
|
|
77
|
+
| 3.6–6.5 | balanced | Sonnet / GPT-4o / Gemini Pro |
|
|
78
|
+
| 6.6–10 | capable | Opus / o1 / Gemini Ultra |
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# complexity-analyzer
|
|
2
|
+
|
|
3
|
+
ML-powered prompt complexity analyzer for LLM routing.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install . # basic (keyword features)
|
|
9
|
+
pip install ".[embeddings]" # + semantic embeddings (better accuracy)
|
|
10
|
+
pip install ".[all]" # everything
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from complexity_analyzer import complexity
|
|
17
|
+
|
|
18
|
+
r = complexity("prove P≠NP")
|
|
19
|
+
print(r.score) # 9.2
|
|
20
|
+
print(r.model) # claude-opus-4-6
|
|
21
|
+
print(r.tier) # capable
|
|
22
|
+
print(r) # Score 9.2/10 | Tier: capable | Model: claude-opus-4-6 | Backend: ml
|
|
23
|
+
r.explain() # full breakdown with dimension scores
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### With semantic embeddings (requires `pip install ".[embeddings]"`)
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from complexity_analyzer import load_embedding_model, complexity
|
|
30
|
+
|
|
31
|
+
load_embedding_model() # call once at startup — downloads ~80MB once, cached
|
|
32
|
+
r = complexity("your prompt")
|
|
33
|
+
print(r.backend) # ml
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### CLI
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
complexity-analyzer -p "your prompt"
|
|
40
|
+
complexity-analyzer --only score -p "your prompt"
|
|
41
|
+
complexity-analyzer --provider openai -p "your prompt"
|
|
42
|
+
complexity-analyzer --json -p "your prompt"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Retrain with your own data
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Generate labeled data
|
|
49
|
+
python generate_dataset.py # 50k examples, free APIs
|
|
50
|
+
python generate_targeted.py # targeted complex examples
|
|
51
|
+
|
|
52
|
+
# Retrain
|
|
53
|
+
python training.py --data dataset.jsonl --balance
|
|
54
|
+
|
|
55
|
+
# Replace bundled model
|
|
56
|
+
cp model.joblib complexity_analyzer/model.joblib
|
|
57
|
+
pip install -e . # re-install to pick up new model
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Score scale
|
|
61
|
+
|
|
62
|
+
| Score | Tier | Use |
|
|
63
|
+
|---|---|---|
|
|
64
|
+
| 1–3.5 | fast | Haiku / GPT-4o-mini / Gemini Flash |
|
|
65
|
+
| 3.6–6.5 | balanced | Sonnet / GPT-4o / Gemini Pro |
|
|
66
|
+
| 6.6–10 | capable | Opus / o1 / Gemini Ultra |
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
prompt_prompt_complexity_analyzer
|
|
3
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
4
|
+
ML-powered prompt complexity analyzer for LLM routing.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from prompt_complexity_analyzer import complexity
|
|
8
|
+
|
|
9
|
+
r = complexity("prove P≠NP")
|
|
10
|
+
print(r.score) # 9.2
|
|
11
|
+
print(r.model) # claude-opus-4-6
|
|
12
|
+
print(r.tier) # capable
|
|
13
|
+
r.explain() # full breakdown
|
|
14
|
+
|
|
15
|
+
With semantic embeddings (better accuracy):
|
|
16
|
+
from prompt_complexity_analyzer import load_embedding_model
|
|
17
|
+
load_embedding_model() # call once at startup
|
|
18
|
+
r = complexity("your prompt")
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from ._core import (
|
|
22
|
+
complexity,
|
|
23
|
+
extract_features,
|
|
24
|
+
set_model,
|
|
25
|
+
load_embedding_model,
|
|
26
|
+
full_feature_names,
|
|
27
|
+
ComplexityResult,
|
|
28
|
+
FEATURE_NAMES,
|
|
29
|
+
EMBEDDING_DIM,
|
|
30
|
+
MODEL_TIERS,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
__all__ = [
|
|
34
|
+
"complexity",
|
|
35
|
+
"extract_features",
|
|
36
|
+
"set_model",
|
|
37
|
+
"load_embedding_model",
|
|
38
|
+
"full_feature_names",
|
|
39
|
+
"ComplexityResult",
|
|
40
|
+
"FEATURE_NAMES",
|
|
41
|
+
"EMBEDDING_DIM",
|
|
42
|
+
"MODEL_TIERS",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
__version__ = "0.1.0"
|
|
46
|
+
|
|
47
|
+
# ── Auto-load bundled model ───────────────────────────────────────────────────
|
|
48
|
+
# model.joblib ships inside the package — no manual set_model() call needed.
|
|
49
|
+
from pathlib import Path as _Path
|
|
50
|
+
|
|
51
|
+
_bundled = _Path(__file__).parent / "model.joblib"
|
|
52
|
+
if _bundled.exists():
|
|
53
|
+
set_model(str(_bundled))
|
|
54
|
+
|
|
55
|
+
# ── Auto-load embedding model ─────────────────────────────────────────────────
|
|
56
|
+
# Loads all-MiniLM-L6-v2 automatically if sentence-transformers is installed.
|
|
57
|
+
# Install with: pip install "complexity-analyzer[embeddings]"
|
|
58
|
+
load_embedding_model() # silently skips if sentence-transformers not installed
|
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
prompt_complexity_analyzer.py
|
|
4
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
5
|
+
Single-file prompt complexity analyzer.
|
|
6
|
+
ML-powered when a trained model is available; heuristic fallback.
|
|
7
|
+
|
|
8
|
+
── Import API ────────────────────────────────────────────────────
|
|
9
|
+
from prompt_complexity_analyzer import complexity
|
|
10
|
+
|
|
11
|
+
r = complexity("Your prompt here")
|
|
12
|
+
r.score # float 1–10
|
|
13
|
+
r.tier # "fast" | "balanced" | "capable"
|
|
14
|
+
r.model # recommended model string
|
|
15
|
+
r["reasoning"] # dimension score — fuzzy key access
|
|
16
|
+
print(r) # one-line summary
|
|
17
|
+
r.explain() # full breakdown string
|
|
18
|
+
|
|
19
|
+
── With ML model ─────────────────────────────────────────────────
|
|
20
|
+
from prompt_complexity_analyzer import set_model
|
|
21
|
+
set_model("./model.joblib") # load once globally
|
|
22
|
+
|
|
23
|
+
# or per-call:
|
|
24
|
+
r = complexity("prompt", model_path="./model.joblib")
|
|
25
|
+
|
|
26
|
+
── Feature extraction (for training) ────────────────────────────
|
|
27
|
+
from prompt_complexity_analyzer import extract_features, FEATURE_NAMES
|
|
28
|
+
|
|
29
|
+
feats = extract_features("Your prompt") # dict[str, float]
|
|
30
|
+
X = [feats[f] for f in FEATURE_NAMES] # list in canonical order
|
|
31
|
+
# Train: regressor X → score (float 1–10)
|
|
32
|
+
# Save: import joblib; joblib.dump(model, "model.joblib")
|
|
33
|
+
|
|
34
|
+
── CLI ───────────────────────────────────────────────────────────
|
|
35
|
+
python prompt_complexity_analyzer.py -p "Your prompt"
|
|
36
|
+
python prompt_complexity_analyzer.py --only score -p "..."
|
|
37
|
+
python prompt_complexity_analyzer.py --only reasoning -p "..."
|
|
38
|
+
python prompt_complexity_analyzer.py --provider openai -p "..."
|
|
39
|
+
python prompt_complexity_analyzer.py --model ./model.joblib -p "..."
|
|
40
|
+
python prompt_complexity_analyzer.py --json -p "..."
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
from __future__ import annotations
|
|
44
|
+
|
|
45
|
+
import re
|
|
46
|
+
import sys
|
|
47
|
+
import json
|
|
48
|
+
import argparse
|
|
49
|
+
from typing import Any, Optional
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"complexity",
|
|
53
|
+
"extract_features",
|
|
54
|
+
"set_model",
|
|
55
|
+
"load_embedding_model",
|
|
56
|
+
"full_feature_names",
|
|
57
|
+
"ComplexityResult",
|
|
58
|
+
"FEATURE_NAMES",
|
|
59
|
+
"EMBEDDING_DIM",
|
|
60
|
+
"MODEL_TIERS",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
64
|
+
# Keyword Banks
|
|
65
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
_REASONING_KW = [
|
|
68
|
+
"analyze", "analyse", "compare", "contrast", "evaluate", "assess",
|
|
69
|
+
"critique", "synthesize", "synthesise", "step by step", "step-by-step",
|
|
70
|
+
"think through", "pros and cons", "trade-offs", "tradeoffs",
|
|
71
|
+
"implications", "consequences", "justify", "argue", "prove", "disprove",
|
|
72
|
+
"infer", "deduce", "derive", "refute", "explain why", "reason through",
|
|
73
|
+
"break down", "root cause", "first principles", "critically",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
_DOMAIN_KW: dict[str, list[str]] = {
|
|
77
|
+
"math": ["integral", "derivative", "calculus", "theorem", "proof",
|
|
78
|
+
"polynomial", "matrix", "vector", "eigenvalue", "probability",
|
|
79
|
+
"statistics", "combinatorics", "modular arithmetic", "fourier"],
|
|
80
|
+
"code": ["function", "class", "algorithm", "debug", "refactor",
|
|
81
|
+
"implement", "compile", "runtime", "async", "api",
|
|
82
|
+
"database", "optimize", "sql", "regex", "recursion",
|
|
83
|
+
"binary", "complexity", "data structure"],
|
|
84
|
+
"security": ["exploit", "vulnerability", "penetration", "ctf", "cve",
|
|
85
|
+
"payload", "injection", "xss", "csrf", "rop", "shellcode",
|
|
86
|
+
"privilege escalation", "reverse shell", "buffer overflow",
|
|
87
|
+
"ecdh", "rsa", "tls", "cipher", "cryptograph"],
|
|
88
|
+
"medical": ["diagnosis", "symptom", "treatment", "pharmacology",
|
|
89
|
+
"clinical", "pathology", "prognosis", "dosage",
|
|
90
|
+
"contraindication", "etiology", "differential"],
|
|
91
|
+
"legal": ["liability", "statute", "jurisdiction", "precedent",
|
|
92
|
+
"contract", "intellectual property", "tort", "litigation",
|
|
93
|
+
"compliance", "gdpr", "dpdp", "regulatory"],
|
|
94
|
+
"finance": ["portfolio", "derivative", "arbitrage", "hedge",
|
|
95
|
+
"valuation", "amortization", "liquidity", "sharpe ratio",
|
|
96
|
+
"volatility", "dcf", "ebitda", "options pricing", "equity"],
|
|
97
|
+
"science": ["hypothesis", "empirical", "thermodynamics", "quantum",
|
|
98
|
+
"molecular", "genome", "entropy", "catalysis", "osmosis",
|
|
99
|
+
"photosynthesis", "relativity", "atomic"],
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
_AMBIGUITY_KW = [
|
|
103
|
+
"something", "stuff", "things", "whatever", "anything", "somehow",
|
|
104
|
+
"maybe", "perhaps", "not sure", "i think", "kind of", "sort of",
|
|
105
|
+
"some kind", "you know", "etc", "and so on", "and stuff",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
_CREATIVE_KW = [
|
|
109
|
+
"write", "compose", "generate", "draft", "brainstorm", "imagine",
|
|
110
|
+
"invent", "story", "poem", "essay", "narrative", "fiction",
|
|
111
|
+
"come up with", "create a",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
_OUTPUT_FORMAT_KW: dict[str, list[str]] = {
|
|
115
|
+
"json_yaml_xml": ["json", "yaml", "xml", "schema", "format as", "structured output"],
|
|
116
|
+
"long_form": ["essay", "report", "article", "in-depth", "comprehensive", "detailed"],
|
|
117
|
+
"code_output": ["```", "implement", "code", "script", "program"],
|
|
118
|
+
"table": ["table", "spreadsheet", "compare side by side", "columns"],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
122
|
+
# Feature Names (canonical order — do not reorder; matches ML model input)
|
|
123
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
FEATURE_NAMES: list[str] = [
|
|
126
|
+
# Structural (no token_count — verbosity ≠ complexity)
|
|
127
|
+
"sentence_count",
|
|
128
|
+
"avg_word_length", # suppressed: binned 0/1/2 to limit dominance
|
|
129
|
+
"question_count",
|
|
130
|
+
"subtask_signal_count", # explicit multi-part signals
|
|
131
|
+
# Reasoning demand
|
|
132
|
+
"reasoning_kw_count",
|
|
133
|
+
# Domain expertise required
|
|
134
|
+
"domain_math_count",
|
|
135
|
+
"domain_code_count",
|
|
136
|
+
"domain_security_count",
|
|
137
|
+
"domain_medical_count",
|
|
138
|
+
"domain_legal_count",
|
|
139
|
+
"domain_finance_count",
|
|
140
|
+
"domain_science_count",
|
|
141
|
+
"unique_domain_count", # cross-domain breadth
|
|
142
|
+
# Output and intent
|
|
143
|
+
"ambiguity_kw_count",
|
|
144
|
+
"creative_kw_count",
|
|
145
|
+
"output_format_signal_count",
|
|
146
|
+
"has_code_block",
|
|
147
|
+
] # 17 keyword features
|
|
148
|
+
|
|
149
|
+
EMBEDDING_DIM = 384 # all-MiniLM-L6-v2 output dimension
|
|
150
|
+
|
|
151
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
152
|
+
# Model Tiers
|
|
153
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
MODEL_TIERS: dict[str, dict[str, Any]] = {
|
|
156
|
+
"fast": {
|
|
157
|
+
"label": "Fast / Lightweight",
|
|
158
|
+
"anthropic": "claude-haiku-4-5",
|
|
159
|
+
"openai": "gpt-4o-mini",
|
|
160
|
+
"google": "gemini-2.0-flash",
|
|
161
|
+
"ollama": "qwen3:1.7b",
|
|
162
|
+
"score_range": (1.0, 3.5),
|
|
163
|
+
"use_when": "Factual Q&A, simple rewrites, classification, lookups",
|
|
164
|
+
},
|
|
165
|
+
"balanced": {
|
|
166
|
+
"label": "Balanced",
|
|
167
|
+
"anthropic": "claude-sonnet-4-6",
|
|
168
|
+
"openai": "gpt-4o",
|
|
169
|
+
"google": "gemini-2.0-pro",
|
|
170
|
+
"ollama": "qwen3:14b",
|
|
171
|
+
"score_range": (3.6, 6.5),
|
|
172
|
+
"use_when": "Multi-step reasoning, code, domain tasks, structured output",
|
|
173
|
+
},
|
|
174
|
+
"capable": {
|
|
175
|
+
"label": "High Capability",
|
|
176
|
+
"anthropic": "claude-opus-4-6",
|
|
177
|
+
"openai": "o1",
|
|
178
|
+
"google": "gemini-2.5-pro",
|
|
179
|
+
"ollama": "qwen3:32b",
|
|
180
|
+
"score_range": (6.6, 10.0),
|
|
181
|
+
"use_when": "Complex research, deep reasoning, ambiguous high-stakes tasks",
|
|
182
|
+
},
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
186
|
+
# ML Model State
|
|
187
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
_ml_model: Optional[Any] = None
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def set_model(path: str) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Load a trained ML model globally. Call once at startup.
|
|
195
|
+
|
|
196
|
+
The model must implement sklearn's predict(X) interface where
|
|
197
|
+
X is shape (n_samples, len(full_feature_names())) and output is a
|
|
198
|
+
float score in [1, 10].
|
|
199
|
+
|
|
200
|
+
Compatible save/load:
|
|
201
|
+
import joblib
|
|
202
|
+
joblib.dump(trained_model, "model.joblib") # save
|
|
203
|
+
set_model("model.joblib") # load here
|
|
204
|
+
"""
|
|
205
|
+
global _ml_model
|
|
206
|
+
_ml_model = _load_model_from_path(path)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _load_model_from_path(path: str) -> Any:
|
|
210
|
+
try:
|
|
211
|
+
import joblib # type: ignore
|
|
212
|
+
return joblib.load(path)
|
|
213
|
+
except ImportError:
|
|
214
|
+
import pickle
|
|
215
|
+
with open(path, "rb") as f:
|
|
216
|
+
return pickle.load(f)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# ── Embedding Model State ─────────────────────────────────────────────────────
|
|
220
|
+
|
|
221
|
+
_embedding_model: Optional[Any] = None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def load_embedding_model(model_name: str = "all-MiniLM-L6-v2") -> bool:
|
|
225
|
+
"""
|
|
226
|
+
Load a sentence-transformer for semantic embeddings (Option 3).
|
|
227
|
+
Downloads ~80MB once on first call, then cached locally.
|
|
228
|
+
|
|
229
|
+
Returns True if loaded successfully, False otherwise.
|
|
230
|
+
|
|
231
|
+
pip install sentence-transformers
|
|
232
|
+
from prompt_complexity_analyzer import load_embedding_model
|
|
233
|
+
load_embedding_model() # downloads all-MiniLM-L6-v2 once
|
|
234
|
+
r = complexity("your prompt")
|
|
235
|
+
"""
|
|
236
|
+
global _embedding_model
|
|
237
|
+
try:
|
|
238
|
+
from sentence_transformers import SentenceTransformer # type: ignore
|
|
239
|
+
_embedding_model = SentenceTransformer(model_name)
|
|
240
|
+
return True
|
|
241
|
+
except ImportError:
|
|
242
|
+
print(
|
|
243
|
+
"[prompt_complexity_analyzer] sentence-transformers not installed. "
|
|
244
|
+
"Run: pip install sentence-transformers",
|
|
245
|
+
file=sys.stderr,
|
|
246
|
+
)
|
|
247
|
+
return False
|
|
248
|
+
except Exception as e:
|
|
249
|
+
print(
|
|
250
|
+
f"[prompt_complexity_analyzer] Could not load embedding model ({e}). "
|
|
251
|
+
"Continuing without embeddings — keyword features only.",
|
|
252
|
+
file=sys.stderr,
|
|
253
|
+
)
|
|
254
|
+
return False
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def full_feature_names() -> list[str]:
|
|
258
|
+
"""
|
|
259
|
+
Returns the complete feature vector names used by the ML model.
|
|
260
|
+
= FEATURE_NAMES (16 keyword features)
|
|
261
|
+
+ emb_000..emb_383 (384 semantic embedding dims, if embedding model loaded)
|
|
262
|
+
|
|
263
|
+
Use this in training.py to build the feature matrix:
|
|
264
|
+
X = [feats[f] for f in full_feature_names()]
|
|
265
|
+
"""
|
|
266
|
+
if _embedding_model is not None:
|
|
267
|
+
return FEATURE_NAMES + [f"emb_{i:03d}" for i in range(EMBEDDING_DIM)]
|
|
268
|
+
return FEATURE_NAMES
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
272
|
+
# Feature Extraction
|
|
273
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
274
|
+
|
|
275
|
+
def extract_features(prompt: str) -> dict[str, float]:
|
|
276
|
+
"""
|
|
277
|
+
Extract features from a prompt.
|
|
278
|
+
- 16 keyword features (FEATURE_NAMES)
|
|
279
|
+
- + 384 semantic embedding dims if load_embedding_model() was called
|
|
280
|
+
|
|
281
|
+
Returns a dict keyed by full_feature_names().
|
|
282
|
+
Use to generate training data:
|
|
283
|
+
|
|
284
|
+
label = float(input("Score (1-10): "))
|
|
285
|
+
row = {"features": extract_features(prompt), "label": label}
|
|
286
|
+
|
|
287
|
+
Feature vector for model:
|
|
288
|
+
X = [feats[f] for f in full_feature_names()]
|
|
289
|
+
"""
|
|
290
|
+
tl = prompt.lower()
|
|
291
|
+
words = re.findall(r"\b\w+\b", tl)
|
|
292
|
+
sents = [s for s in re.split(r"[.!?]+", prompt) if s.strip()]
|
|
293
|
+
|
|
294
|
+
# ── Structural ────────────────────────────────────────────────────────────
|
|
295
|
+
sentence_count = float(max(1, len(sents)))
|
|
296
|
+
question_count = float(len(re.findall(r"\?", prompt)))
|
|
297
|
+
|
|
298
|
+
# avg_word_length binned to 0/1/2 — preserves directional signal
|
|
299
|
+
# (simple/medium/technical) without letting the continuous value dominate.
|
|
300
|
+
# 0 = short words (≤4.5 avg) 1 = medium (4.5–6.0) 2 = long/technical (>6.0)
|
|
301
|
+
_awl = (sum(len(w) for w in words) / len(words)) if words else 0.0
|
|
302
|
+
avg_word_length = 0.0 if _awl < 4.5 else (1.0 if _awl < 6.0 else 2.0)
|
|
303
|
+
|
|
304
|
+
_subtask_patterns = [
|
|
305
|
+
r"\balso\b", r"\badditionally\b", r"\bfurthermore\b",
|
|
306
|
+
r"\band then\b", r"\bmoreover\b", r"\bfinally\b",
|
|
307
|
+
r"\bstep \d+", r"^\s*\d+[.)]\s", r"\?(?=\s|$)",
|
|
308
|
+
]
|
|
309
|
+
subtask_signal_count = float(sum(
|
|
310
|
+
len(re.findall(p, tl, re.MULTILINE)) for p in _subtask_patterns
|
|
311
|
+
))
|
|
312
|
+
|
|
313
|
+
# ── Reasoning ─────────────────────────────────────────────────────────────
|
|
314
|
+
reasoning_kw_count = float(sum(1 for kw in _REASONING_KW if kw in tl))
|
|
315
|
+
|
|
316
|
+
# ── Domain ────────────────────────────────────────────────────────────────
|
|
317
|
+
domain_counts: dict[str, int] = {
|
|
318
|
+
d: sum(1 for kw in kws if kw in tl)
|
|
319
|
+
for d, kws in _DOMAIN_KW.items()
|
|
320
|
+
}
|
|
321
|
+
unique_domain_count = float(sum(1 for c in domain_counts.values() if c > 0))
|
|
322
|
+
|
|
323
|
+
# ── Other signals ─────────────────────────────────────────────────────────
|
|
324
|
+
ambiguity_kw_count = float(sum(1 for kw in _AMBIGUITY_KW if kw in tl))
|
|
325
|
+
creative_kw_count = float(sum(1 for kw in _CREATIVE_KW if kw in tl))
|
|
326
|
+
output_format_signal_count = float(sum(
|
|
327
|
+
1 for kws in _OUTPUT_FORMAT_KW.values() if any(kw in tl for kw in kws)
|
|
328
|
+
))
|
|
329
|
+
has_code_block = 1.0 if "```" in prompt else 0.0
|
|
330
|
+
|
|
331
|
+
feats: dict[str, float] = {
|
|
332
|
+
"sentence_count": sentence_count,
|
|
333
|
+
"avg_word_length": avg_word_length,
|
|
334
|
+
"question_count": question_count,
|
|
335
|
+
"subtask_signal_count": subtask_signal_count,
|
|
336
|
+
"reasoning_kw_count": reasoning_kw_count,
|
|
337
|
+
"domain_math_count": float(domain_counts["math"]),
|
|
338
|
+
"domain_code_count": float(domain_counts["code"]),
|
|
339
|
+
"domain_security_count": float(domain_counts["security"]),
|
|
340
|
+
"domain_medical_count": float(domain_counts["medical"]),
|
|
341
|
+
"domain_legal_count": float(domain_counts["legal"]),
|
|
342
|
+
"domain_finance_count": float(domain_counts["finance"]),
|
|
343
|
+
"domain_science_count": float(domain_counts["science"]),
|
|
344
|
+
"unique_domain_count": unique_domain_count,
|
|
345
|
+
"ambiguity_kw_count": ambiguity_kw_count,
|
|
346
|
+
"creative_kw_count": creative_kw_count,
|
|
347
|
+
"output_format_signal_count":output_format_signal_count,
|
|
348
|
+
"has_code_block": has_code_block,
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
# ── Semantic embeddings (Option 3) ────────────────────────────────────────
|
|
352
|
+
if _embedding_model is not None:
|
|
353
|
+
embedding = _embedding_model.encode(prompt, show_progress_bar=False)
|
|
354
|
+
for i, val in enumerate(embedding):
|
|
355
|
+
feats[f"emb_{i:03d}"] = float(val)
|
|
356
|
+
|
|
357
|
+
return feats
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _feature_vector(prompt: str) -> list[float]:
|
|
361
|
+
feats = extract_features(prompt)
|
|
362
|
+
return [feats[f] for f in full_feature_names()]
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
366
|
+
# Heuristic Scoring (fallback when no ML model is available)
|
|
367
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
368
|
+
|
|
369
|
+
def _heuristic_score(feats: dict[str, float]) -> tuple[float, dict[str, float]]:
|
|
370
|
+
rk = feats["reasoning_kw_count"]
|
|
371
|
+
sk = feats["subtask_signal_count"]
|
|
372
|
+
ak = feats["ambiguity_kw_count"]
|
|
373
|
+
of = feats["output_format_signal_count"]
|
|
374
|
+
ud = feats["unique_domain_count"]
|
|
375
|
+
wl = feats["avg_word_length"] # binned: 0/1/2
|
|
376
|
+
total_domain = sum(feats[f"domain_{d}_count"]
|
|
377
|
+
for d in ["math", "code", "security", "medical", "legal", "finance", "science"])
|
|
378
|
+
|
|
379
|
+
dims: dict[str, float] = {}
|
|
380
|
+
|
|
381
|
+
# Vocabulary (binned 0/1/2 → weak directional signal only)
|
|
382
|
+
dims["Vocabulary"] = {0.0: 2.0, 1.0: 4.5, 2.0: 6.5}.get(wl, 2.0)
|
|
383
|
+
|
|
384
|
+
# Multi-part
|
|
385
|
+
if sk == 0: dims["Multi-part"] = 1.5
|
|
386
|
+
elif sk <= 2: dims["Multi-part"] = 4.5
|
|
387
|
+
elif sk <= 5: dims["Multi-part"] = 7.0
|
|
388
|
+
else: dims["Multi-part"] = 9.5
|
|
389
|
+
|
|
390
|
+
# Reasoning demand
|
|
391
|
+
dims["Reasoning Depth"] = min(10.0, 1.5 + rk * 1.8) if rk > 0 else 1.5
|
|
392
|
+
|
|
393
|
+
# Domain specificity
|
|
394
|
+
if total_domain == 0:
|
|
395
|
+
dims["Domain Specificity"] = 2.0
|
|
396
|
+
else:
|
|
397
|
+
dims["Domain Specificity"] = min(10.0, 3.0 + total_domain * 1.2 + (ud - 1) * 1.5)
|
|
398
|
+
|
|
399
|
+
# Ambiguity
|
|
400
|
+
if ak >= 3: dims["Ambiguity"] = 8.0
|
|
401
|
+
elif ak >= 1: dims["Ambiguity"] = 5.0
|
|
402
|
+
else: dims["Ambiguity"] = 2.0
|
|
403
|
+
|
|
404
|
+
# Output complexity
|
|
405
|
+
dims["Output Complexity"] = min(10.0, of * 3.0) if of > 0 else 1.5
|
|
406
|
+
|
|
407
|
+
weights = {
|
|
408
|
+
"Vocabulary": 0.05, # suppressed — binned, weak signal
|
|
409
|
+
"Multi-part": 0.22,
|
|
410
|
+
"Reasoning Depth": 0.35,
|
|
411
|
+
"Domain Specificity": 0.25,
|
|
412
|
+
"Ambiguity": 0.08,
|
|
413
|
+
"Output Complexity": 0.05,
|
|
414
|
+
}
|
|
415
|
+
total_w = sum(weights.values())
|
|
416
|
+
overall = sum(dims[d] * weights[d] for d in dims) / total_w
|
|
417
|
+
return round(min(10.0, max(1.0, overall)), 1), dims
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
421
|
+
# ComplexityResult
|
|
422
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
423
|
+
|
|
424
|
+
class ComplexityResult:
|
|
425
|
+
"""
|
|
426
|
+
Full result of a complexity analysis.
|
|
427
|
+
|
|
428
|
+
Attributes:
|
|
429
|
+
score Overall complexity score (float, 1–10).
|
|
430
|
+
tier "fast" | "balanced" | "capable"
|
|
431
|
+
model Recommended model string for the chosen provider.
|
|
432
|
+
provider Provider used (anthropic / openai / google / ollama).
|
|
433
|
+
label Human-readable tier label.
|
|
434
|
+
dimensions {dimension_name: score} dict.
|
|
435
|
+
flags List of advisory messages.
|
|
436
|
+
backend "ml" or "heuristic".
|
|
437
|
+
features Raw extracted features ({FEATURE_NAMES key: value}).
|
|
438
|
+
|
|
439
|
+
Key access (fuzzy substring match on dimension names + special fields):
|
|
440
|
+
result["reasoning"] → Reasoning Depth score
|
|
441
|
+
result["score"] → overall score float
|
|
442
|
+
result["tier"] → tier string
|
|
443
|
+
result["model"] → recommended model
|
|
444
|
+
result["backend"] → "ml" or "heuristic"
|
|
445
|
+
|
|
446
|
+
Methods:
|
|
447
|
+
result.get(key, default) → same as [] but returns default on miss
|
|
448
|
+
result.explain() → full formatted breakdown string
|
|
449
|
+
result.to_dict() → dict representation
|
|
450
|
+
result.to_json(indent=2) → JSON string
|
|
451
|
+
str(result) → one-line summary
|
|
452
|
+
"""
|
|
453
|
+
|
|
454
|
+
def __init__(
|
|
455
|
+
self,
|
|
456
|
+
score: float,
|
|
457
|
+
tier: str,
|
|
458
|
+
provider: str,
|
|
459
|
+
dimensions: dict[str, float],
|
|
460
|
+
flags: list[str],
|
|
461
|
+
backend: str,
|
|
462
|
+
features: dict[str, float],
|
|
463
|
+
) -> None:
|
|
464
|
+
self.score = score
|
|
465
|
+
self.tier = tier
|
|
466
|
+
self.provider = provider
|
|
467
|
+
self.dimensions = dimensions
|
|
468
|
+
self.flags = flags
|
|
469
|
+
self.backend = backend
|
|
470
|
+
self.features = features
|
|
471
|
+
|
|
472
|
+
_t = MODEL_TIERS[tier]
|
|
473
|
+
self.model = _t.get(provider, _t["anthropic"])
|
|
474
|
+
self.label = _t["label"]
|
|
475
|
+
|
|
476
|
+
# ── Key access ────────────────────────────────────────────────────────────
|
|
477
|
+
|
|
478
|
+
def __getitem__(self, key: str) -> Any:
|
|
479
|
+
value = self.get(key)
|
|
480
|
+
if value is None:
|
|
481
|
+
valid = "score, tier, model, label, backend, " + ", ".join(self.dimensions)
|
|
482
|
+
raise KeyError(f"'{key}' not found. Valid keys: {valid}")
|
|
483
|
+
return value
|
|
484
|
+
|
|
485
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
486
|
+
"""
|
|
487
|
+
Fuzzy field access. Substring-matches against dimension names
|
|
488
|
+
and special fields (score, tier, model, label, backend).
|
|
489
|
+
"""
|
|
490
|
+
k = key.strip().lower()
|
|
491
|
+
if k in ("score", "overall"): return self.score
|
|
492
|
+
if k == "tier": return self.tier
|
|
493
|
+
if k == "model": return self.model
|
|
494
|
+
if k == "label": return self.label
|
|
495
|
+
if k == "backend": return self.backend
|
|
496
|
+
|
|
497
|
+
matches = [(n, v) for n, v in self.dimensions.items() if k in n.lower()]
|
|
498
|
+
if not matches:
|
|
499
|
+
return default
|
|
500
|
+
if len(matches) == 1:
|
|
501
|
+
return matches[0][1]
|
|
502
|
+
exact = [(n, v) for n, v in matches if n.lower().startswith(k)]
|
|
503
|
+
return (exact or matches)[0][1]
|
|
504
|
+
|
|
505
|
+
# ── String representations ────────────────────────────────────────────────
|
|
506
|
+
|
|
507
|
+
def __str__(self) -> str:
|
|
508
|
+
return (
|
|
509
|
+
f"Score {self.score:.1f}/10 | "
|
|
510
|
+
f"Tier: {self.tier} | "
|
|
511
|
+
f"Model: {self.model} | "
|
|
512
|
+
f"Backend: {self.backend}"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def __repr__(self) -> str:
|
|
516
|
+
return (
|
|
517
|
+
f"ComplexityResult(score={self.score}, tier='{self.tier}', "
|
|
518
|
+
f"model='{self.model}', backend='{self.backend}')"
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
def explain(self) -> str:
|
|
522
|
+
"""Return a full human-readable breakdown."""
|
|
523
|
+
sep = "─" * 60
|
|
524
|
+
tier_cfg = MODEL_TIERS[self.tier]
|
|
525
|
+
|
|
526
|
+
lines = [
|
|
527
|
+
"",
|
|
528
|
+
f" {sep}",
|
|
529
|
+
f" Score {self.score:>5.1f} / 10",
|
|
530
|
+
f" Tier {self.label}",
|
|
531
|
+
f" Model {self.model} [{self.provider}]",
|
|
532
|
+
f" Backend {self.backend}",
|
|
533
|
+
f" Use when {tier_cfg['use_when']}",
|
|
534
|
+
f" {sep}",
|
|
535
|
+
"",
|
|
536
|
+
f" {'Dimension':<24} {'':14} Score",
|
|
537
|
+
f" {'─'*58}",
|
|
538
|
+
]
|
|
539
|
+
for name, score in self.dimensions.items():
|
|
540
|
+
filled = round(score / 10 * 14)
|
|
541
|
+
bar = "█" * filled + "░" * (14 - filled)
|
|
542
|
+
lines.append(f" {name:<24} {bar} {score:>4.1f}")
|
|
543
|
+
|
|
544
|
+
if self.flags:
|
|
545
|
+
lines += ["", f" {'─'*58}"]
|
|
546
|
+
for flag in self.flags:
|
|
547
|
+
lines.append(f" {flag}")
|
|
548
|
+
|
|
549
|
+
lines += [f" {sep}", ""]
|
|
550
|
+
return "\n".join(lines)
|
|
551
|
+
|
|
552
|
+
# ── Serialization ─────────────────────────────────────────────────────────
|
|
553
|
+
|
|
554
|
+
def to_dict(self) -> dict[str, Any]:
|
|
555
|
+
return {
|
|
556
|
+
"score": self.score,
|
|
557
|
+
"tier": self.tier,
|
|
558
|
+
"model": self.model,
|
|
559
|
+
"provider": self.provider,
|
|
560
|
+
"label": self.label,
|
|
561
|
+
"backend": self.backend,
|
|
562
|
+
"dimensions": self.dimensions,
|
|
563
|
+
"flags": self.flags,
|
|
564
|
+
"features": self.features,
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
def to_json(self, indent: int = 2) -> str:
|
|
568
|
+
return json.dumps(self.to_dict(), indent=indent)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
572
|
+
# Main Entry Point
|
|
573
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
574
|
+
|
|
575
|
+
def complexity(
|
|
576
|
+
prompt: str,
|
|
577
|
+
provider: str = "anthropic",
|
|
578
|
+
model_path: Optional[str] = None,
|
|
579
|
+
) -> ComplexityResult:
|
|
580
|
+
"""
|
|
581
|
+
Analyze a prompt and return a ComplexityResult.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
prompt: The prompt or task description to analyze.
|
|
585
|
+
provider: Model provider for the recommendation.
|
|
586
|
+
One of: "anthropic" (default), "openai", "google", "ollama".
|
|
587
|
+
model_path: Path to a trained ML model (.joblib or .pkl).
|
|
588
|
+
Overrides the globally loaded model from set_model().
|
|
589
|
+
Falls back to heuristic if neither is set.
|
|
590
|
+
|
|
591
|
+
Returns:
|
|
592
|
+
ComplexityResult
|
|
593
|
+
|
|
594
|
+
Examples:
|
|
595
|
+
r = complexity("Explain quantum entanglement simply")
|
|
596
|
+
print(r) # one-line summary
|
|
597
|
+
r.explain() # full breakdown
|
|
598
|
+
r.score # 4.2
|
|
599
|
+
r["reasoning"] # Reasoning Depth score
|
|
600
|
+
r.to_dict() # serialize
|
|
601
|
+
|
|
602
|
+
r = complexity("prompt", provider="openai", model_path="./model.joblib")
|
|
603
|
+
"""
|
|
604
|
+
_VALID_PROVIDERS = {"anthropic", "openai", "google", "ollama"}
|
|
605
|
+
if provider not in _VALID_PROVIDERS:
|
|
606
|
+
raise ValueError(f"Unknown provider '{provider}'. Choose from: {', '.join(_VALID_PROVIDERS)}")
|
|
607
|
+
|
|
608
|
+
feats = extract_features(prompt)
|
|
609
|
+
fvec = [feats[f] for f in full_feature_names()]
|
|
610
|
+
|
|
611
|
+
# Resolve ML model (per-call path takes priority over global)
|
|
612
|
+
model = None
|
|
613
|
+
if model_path:
|
|
614
|
+
model = _load_model_from_path(model_path)
|
|
615
|
+
elif _ml_model is not None:
|
|
616
|
+
model = _ml_model
|
|
617
|
+
|
|
618
|
+
# Score
|
|
619
|
+
if model is not None:
|
|
620
|
+
try:
|
|
621
|
+
raw = float(model.predict([fvec])[0])
|
|
622
|
+
score = round(min(10.0, max(1.0, raw)), 1)
|
|
623
|
+
_, dims = _heuristic_score(feats) # dimensions for display
|
|
624
|
+
backend = "ml"
|
|
625
|
+
except Exception as e:
|
|
626
|
+
msg = str(e).lower()
|
|
627
|
+
if any(x in msg for x in ("feature", "shape", "mismatch", "dimension")):
|
|
628
|
+
print(
|
|
629
|
+
"[prompt_complexity_analyzer] Feature mismatch — model was trained with embeddings "
|
|
630
|
+
"but load_embedding_model() was not called (or vice versa). "
|
|
631
|
+
"Call load_embedding_model() before complexity(). Falling back to heuristic.",
|
|
632
|
+
file=sys.stderr,
|
|
633
|
+
)
|
|
634
|
+
else:
|
|
635
|
+
print(
|
|
636
|
+
f"[prompt_complexity_analyzer] ML prediction failed ({e}). "
|
|
637
|
+
"Falling back to heuristic.", file=sys.stderr,
|
|
638
|
+
)
|
|
639
|
+
score, dims = _heuristic_score(feats)
|
|
640
|
+
backend = "heuristic"
|
|
641
|
+
else:
|
|
642
|
+
score, dims = _heuristic_score(feats)
|
|
643
|
+
backend = "heuristic"
|
|
644
|
+
|
|
645
|
+
# Tier
|
|
646
|
+
if score <= 3.5: tier = "fast"
|
|
647
|
+
elif score <= 6.5: tier = "balanced"
|
|
648
|
+
else: tier = "capable"
|
|
649
|
+
|
|
650
|
+
# Flags
|
|
651
|
+
flags: list[str] = []
|
|
652
|
+
if score >= 8.5:
|
|
653
|
+
flags.append("⚠ Very high complexity — consider decomposing the task first")
|
|
654
|
+
if dims.get("Ambiguity", 0) >= 7.0:
|
|
655
|
+
flags.append("⚠ High ambiguity — add explicit context / constraints")
|
|
656
|
+
if dims.get("Multi-part", 0) >= 7.0:
|
|
657
|
+
flags.append("💡 Multi-part — consider splitting into sequential sub-prompts")
|
|
658
|
+
if dims.get("Domain Specificity", 0) >= 7.0 and tier == "fast":
|
|
659
|
+
flags.append("💡 Specialized domain at fast tier — may miss nuance")
|
|
660
|
+
|
|
661
|
+
return ComplexityResult(
|
|
662
|
+
score=score, tier=tier, provider=provider,
|
|
663
|
+
dimensions=dims, flags=flags, backend=backend, features=feats,
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
668
|
+
# CLI
|
|
669
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
670
|
+
|
|
671
|
+
def _cli() -> None:
|
|
672
|
+
parser = argparse.ArgumentParser(
|
|
673
|
+
prog="prompt_complexity_analyzer",
|
|
674
|
+
description="Analyze prompt complexity → model routing recommendation.",
|
|
675
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
676
|
+
epilog="""
|
|
677
|
+
--only FIELD values:
|
|
678
|
+
score / overall Overall score (float)
|
|
679
|
+
tier "fast" | "balanced" | "capable"
|
|
680
|
+
model Recommended model string
|
|
681
|
+
label Tier label
|
|
682
|
+
backend "ml" | "heuristic"
|
|
683
|
+
reasoning Reasoning Depth score
|
|
684
|
+
domain Domain Specificity score
|
|
685
|
+
ambiguity Ambiguity score
|
|
686
|
+
multi Multi-part score
|
|
687
|
+
length Length score
|
|
688
|
+
output Output Complexity score
|
|
689
|
+
|
|
690
|
+
Examples:
|
|
691
|
+
python prompt_complexity_analyzer.py -p "What is 2+2?"
|
|
692
|
+
python prompt_complexity_analyzer.py -p "Compare ECDH vs RSA for TLS 1.3"
|
|
693
|
+
echo "Some task" | python prompt_complexity_analyzer.py
|
|
694
|
+
python prompt_complexity_analyzer.py --provider openai -p "..."
|
|
695
|
+
python prompt_complexity_analyzer.py --only score -p "..."
|
|
696
|
+
python prompt_complexity_analyzer.py --only reasoning -p "..."
|
|
697
|
+
python prompt_complexity_analyzer.py --model ./model.joblib --only tier -p "..."
|
|
698
|
+
python prompt_complexity_analyzer.py --json -p "..."
|
|
699
|
+
""",
|
|
700
|
+
)
|
|
701
|
+
parser.add_argument("-p", "--prompt",
|
|
702
|
+
help="Prompt text (or pass via stdin)")
|
|
703
|
+
parser.add_argument("--provider",
|
|
704
|
+
choices=["anthropic", "openai", "google", "ollama"],
|
|
705
|
+
default="anthropic",
|
|
706
|
+
help="Model provider (default: anthropic)")
|
|
707
|
+
parser.add_argument("--model", metavar="PATH",
|
|
708
|
+
help="Path to trained ML model (.joblib or .pkl)")
|
|
709
|
+
parser.add_argument("--only", metavar="FIELD",
|
|
710
|
+
help="Output a single field value only")
|
|
711
|
+
parser.add_argument("--json", action="store_true",
|
|
712
|
+
help="Output raw JSON")
|
|
713
|
+
args = parser.parse_args()
|
|
714
|
+
|
|
715
|
+
if args.prompt:
|
|
716
|
+
prompt = args.prompt
|
|
717
|
+
elif not sys.stdin.isatty():
|
|
718
|
+
prompt = sys.stdin.read().strip()
|
|
719
|
+
else:
|
|
720
|
+
print("Enter prompt (Ctrl+D when done):")
|
|
721
|
+
prompt = sys.stdin.read().strip()
|
|
722
|
+
|
|
723
|
+
if not prompt:
|
|
724
|
+
print("Error: no prompt provided.", file=sys.stderr)
|
|
725
|
+
sys.exit(1)
|
|
726
|
+
|
|
727
|
+
result = complexity(prompt, provider=args.provider, model_path=args.model)
|
|
728
|
+
|
|
729
|
+
if args.only:
|
|
730
|
+
value = result.get(args.only)
|
|
731
|
+
if value is None:
|
|
732
|
+
print(
|
|
733
|
+
f"Error: unknown field '{args.only}'. "
|
|
734
|
+
"Try: score, tier, model, label, reasoning, domain, "
|
|
735
|
+
"ambiguity, multi, length, output",
|
|
736
|
+
file=sys.stderr,
|
|
737
|
+
)
|
|
738
|
+
sys.exit(1)
|
|
739
|
+
if args.json:
|
|
740
|
+
print(json.dumps({"field": args.only, "value": value}))
|
|
741
|
+
else:
|
|
742
|
+
print(f"{value:.1f}" if isinstance(value, float) else value)
|
|
743
|
+
elif args.json:
|
|
744
|
+
print(result.to_json())
|
|
745
|
+
else:
|
|
746
|
+
print(result.explain())
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
if __name__ == "__main__":
|
|
750
|
+
_cli()
|
|
Binary file
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: prompt-complexity-analyzer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ML-powered prompt complexity analyzer for LLM routing
|
|
5
|
+
Requires-Python: >=3.9
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: scikit-learn>=1.3
|
|
8
|
+
Requires-Dist: numpy>=1.24
|
|
9
|
+
Requires-Dist: joblib>=1.3
|
|
10
|
+
Requires-Dist: sentence-transformers>=2.2
|
|
11
|
+
Requires-Dist: aiohttp>=3.9
|
|
12
|
+
|
|
13
|
+
# complexity-analyzer
|
|
14
|
+
|
|
15
|
+
ML-powered prompt complexity analyzer for LLM routing.
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install . # basic (keyword features)
|
|
21
|
+
pip install ".[embeddings]" # + semantic embeddings (better accuracy)
|
|
22
|
+
pip install ".[all]" # everything
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from complexity_analyzer import complexity
|
|
29
|
+
|
|
30
|
+
r = complexity("prove P≠NP")
|
|
31
|
+
print(r.score) # 9.2
|
|
32
|
+
print(r.model) # claude-opus-4-6
|
|
33
|
+
print(r.tier) # capable
|
|
34
|
+
print(r) # Score 9.2/10 | Tier: capable | Model: claude-opus-4-6 | Backend: ml
|
|
35
|
+
r.explain() # full breakdown with dimension scores
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### With semantic embeddings (requires `pip install ".[embeddings]"`)
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from complexity_analyzer import load_embedding_model, complexity
|
|
42
|
+
|
|
43
|
+
load_embedding_model() # call once at startup — downloads ~80MB once, cached
|
|
44
|
+
r = complexity("your prompt")
|
|
45
|
+
print(r.backend) # ml
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### CLI
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
complexity-analyzer -p "your prompt"
|
|
52
|
+
complexity-analyzer --only score -p "your prompt"
|
|
53
|
+
complexity-analyzer --provider openai -p "your prompt"
|
|
54
|
+
complexity-analyzer --json -p "your prompt"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Retrain with your own data
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Generate labeled data
|
|
61
|
+
python generate_dataset.py # 50k examples, free APIs
|
|
62
|
+
python generate_targeted.py # targeted complex examples
|
|
63
|
+
|
|
64
|
+
# Retrain
|
|
65
|
+
python training.py --data dataset.jsonl --balance
|
|
66
|
+
|
|
67
|
+
# Replace bundled model
|
|
68
|
+
cp model.joblib complexity_analyzer/model.joblib
|
|
69
|
+
pip install -e . # re-install to pick up new model
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Score scale
|
|
73
|
+
|
|
74
|
+
| Score | Tier | Use |
|
|
75
|
+
|---|---|---|
|
|
76
|
+
| 1–3.5 | fast | Haiku / GPT-4o-mini / Gemini Flash |
|
|
77
|
+
| 3.6–6.5 | balanced | Sonnet / GPT-4o / Gemini Pro |
|
|
78
|
+
| 6.6–10 | capable | Opus / o1 / Gemini Ultra |
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
prompt_complexity_analyzer/__init__.py
|
|
4
|
+
prompt_complexity_analyzer/_core.py
|
|
5
|
+
prompt_complexity_analyzer/model.joblib
|
|
6
|
+
prompt_complexity_analyzer.egg-info/PKG-INFO
|
|
7
|
+
prompt_complexity_analyzer.egg-info/SOURCES.txt
|
|
8
|
+
prompt_complexity_analyzer.egg-info/dependency_links.txt
|
|
9
|
+
prompt_complexity_analyzer.egg-info/entry_points.txt
|
|
10
|
+
prompt_complexity_analyzer.egg-info/requires.txt
|
|
11
|
+
prompt_complexity_analyzer.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
prompt_complexity_analyzer
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "prompt-complexity-analyzer"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "ML-powered prompt complexity analyzer for LLM routing"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"scikit-learn>=1.3",
|
|
13
|
+
"numpy>=1.24",
|
|
14
|
+
"joblib>=1.3",
|
|
15
|
+
"sentence-transformers>=2.2",
|
|
16
|
+
"aiohttp>=3.9",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
# Exposes CLI: prompt_complexity_analyzer -p "your prompt"
|
|
21
|
+
prompt_complexity_analyzer = "prompt_complexity_analyzer._core:_cli"
|
|
22
|
+
|
|
23
|
+
[tool.setuptools.packages.find]
|
|
24
|
+
where = ["."]
|
|
25
|
+
include = ["prompt_complexity_analyzer*"]
|
|
26
|
+
|
|
27
|
+
[tool.setuptools.package-data]
|
|
28
|
+
# Bundle the trained model inside the package
|
|
29
|
+
prompt_complexity_analyzer = ["*.joblib"]
|