skillinfer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. skillinfer-0.1.0/.github/workflows/docs.yml +38 -0
  2. skillinfer-0.1.0/.gitignore +14 -0
  3. skillinfer-0.1.0/LICENSE +21 -0
  4. skillinfer-0.1.0/PKG-INFO +236 -0
  5. skillinfer-0.1.0/README.md +196 -0
  6. skillinfer-0.1.0/TODOS.md +15 -0
  7. skillinfer-0.1.0/docs/api/datasets.md +168 -0
  8. skillinfer-0.1.0/docs/api/population.md +279 -0
  9. skillinfer-0.1.0/docs/api/profile.md +387 -0
  10. skillinfer-0.1.0/docs/api/visualization.md +213 -0
  11. skillinfer-0.1.0/docs/assets/compare_profiles.png +0 -0
  12. skillinfer-0.1.0/docs/assets/convergence_curve.png +0 -0
  13. skillinfer-0.1.0/docs/assets/correlation_heatmap.png +0 -0
  14. skillinfer-0.1.0/docs/assets/feature_distributions.png +0 -0
  15. skillinfer-0.1.0/docs/assets/posterior_profile.png +0 -0
  16. skillinfer-0.1.0/docs/assets/prediction_scatter.png +0 -0
  17. skillinfer-0.1.0/docs/assets/scree_plot.png +0 -0
  18. skillinfer-0.1.0/docs/assets/skill_embedding.png +0 -0
  19. skillinfer-0.1.0/docs/assets/uncertainty_waterfall.png +0 -0
  20. skillinfer-0.1.0/docs/getting-started/concepts.md +72 -0
  21. skillinfer-0.1.0/docs/getting-started/installation.md +52 -0
  22. skillinfer-0.1.0/docs/getting-started/quickstart.md +108 -0
  23. skillinfer-0.1.0/docs/how-it-works/computational-cost.md +67 -0
  24. skillinfer-0.1.0/docs/how-it-works/covariance-estimation.md +118 -0
  25. skillinfer-0.1.0/docs/how-it-works/kalman-update.md +96 -0
  26. skillinfer-0.1.0/docs/index.md +278 -0
  27. skillinfer-0.1.0/docs/javascripts/mathjax.js +16 -0
  28. skillinfer-0.1.0/docs/stylesheets/extra.css +99 -0
  29. skillinfer-0.1.0/docs/tutorials/european-skills.md +97 -0
  30. skillinfer-0.1.0/docs/tutorials/human-skills.md +133 -0
  31. skillinfer-0.1.0/docs/tutorials/llm-benchmarks.md +168 -0
  32. skillinfer-0.1.0/docs/tutorials/orchestration.md +205 -0
  33. skillinfer-0.1.0/examples/esco.py +232 -0
  34. skillinfer-0.1.0/examples/llm_benchmark.py +113 -0
  35. skillinfer-0.1.0/examples/onet.py +211 -0
  36. skillinfer-0.1.0/examples/orchestration.py +152 -0
  37. skillinfer-0.1.0/examples/quickstart.py +122 -0
  38. skillinfer-0.1.0/examples/task_matching.py +114 -0
  39. skillinfer-0.1.0/mcp-server/README.md +94 -0
  40. skillinfer-0.1.0/mcp-server/pyproject.toml +27 -0
  41. skillinfer-0.1.0/mcp-server/skillinfer_mcp/__init__.py +1 -0
  42. skillinfer-0.1.0/mcp-server/skillinfer_mcp/__main__.py +5 -0
  43. skillinfer-0.1.0/mcp-server/skillinfer_mcp/server.py +567 -0
  44. skillinfer-0.1.0/mkdocs.yml +105 -0
  45. skillinfer-0.1.0/pyproject.toml +66 -0
  46. skillinfer-0.1.0/skillinfer/__init__.py +86 -0
  47. skillinfer-0.1.0/skillinfer/_covariance.py +83 -0
  48. skillinfer-0.1.0/skillinfer/_kalman.py +121 -0
  49. skillinfer-0.1.0/skillinfer/datasets/__init__.py +16 -0
  50. skillinfer-0.1.0/skillinfer/datasets/_esco.py +42 -0
  51. skillinfer-0.1.0/skillinfer/datasets/_onet.py +42 -0
  52. skillinfer-0.1.0/skillinfer/datasets/esco.parquet +0 -0
  53. skillinfer-0.1.0/skillinfer/datasets/onet.parquet +0 -0
  54. skillinfer-0.1.0/skillinfer/population.py +396 -0
  55. skillinfer-0.1.0/skillinfer/py.typed +0 -0
  56. skillinfer-0.1.0/skillinfer/state.py +583 -0
  57. skillinfer-0.1.0/skillinfer/types.py +93 -0
  58. skillinfer-0.1.0/skillinfer/validation.py +266 -0
  59. skillinfer-0.1.0/skillinfer/visualization.py +597 -0
  60. skillinfer-0.1.0/tests/test_integration.py +113 -0
  61. skillinfer-0.1.0/tests/test_kalman.py +148 -0
  62. skillinfer-0.1.0/tests/test_match.py +144 -0
  63. skillinfer-0.1.0/tests/test_state.py +222 -0
  64. skillinfer-0.1.0/tests/test_taxonomy.py +159 -0
@@ -0,0 +1,38 @@
1
+ name: Deploy docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: read
9
+ pages: write
10
+ id-token: write
11
+
12
+ concurrency:
13
+ group: pages
14
+ cancel-in-progress: true
15
+
16
+ jobs:
17
+ build:
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+ - run: pip install mkdocs-material pymdown-extensions
25
+ - run: mkdocs build --strict --site-dir _site
26
+ - uses: actions/upload-pages-artifact@v3
27
+ with:
28
+ path: _site
29
+
30
+ deploy:
31
+ needs: build
32
+ runs-on: ubuntu-latest
33
+ environment:
34
+ name: github-pages
35
+ url: ${{ steps.deployment.outputs.page_url }}
36
+ steps:
37
+ - id: deployment
38
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,14 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .pytest_cache/
7
+ *.egg
8
+ .venv/
9
+ examples/.data/
10
+ site/
11
+ paper/
12
+ data/
13
+ results/
14
+ .DS_Store
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Kostadin Devedzhiev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: skillinfer
3
+ Version: 0.1.0
4
+ Summary: Infer a full skill profile from a few observations. Few-shot capability estimation for AI agents and humans.
5
+ Project-URL: Homepage, https://github.com/kostadindev/skillinfer
6
+ Project-URL: Documentation, https://kostadindev.github.io/skillinfer
7
+ Project-URL: Repository, https://github.com/kostadindev/skillinfer
8
+ Project-URL: Issues, https://github.com/kostadindev/skillinfer/issues
9
+ Author-email: Kostadin Devedzhiev <kgd26@cam.ac.uk>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: ai-agents,bayesian-inference,capability-estimation,human-ai-orchestration,kalman-filter,skill-inference
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Scientific/Engineering
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Typing :: Typed
26
+ Requires-Python: >=3.10
27
+ Requires-Dist: numpy>=1.24
28
+ Requires-Dist: pandas>=2.0
29
+ Requires-Dist: scikit-learn>=1.2
30
+ Requires-Dist: scipy>=1.10
31
+ Provides-Extra: dev
32
+ Requires-Dist: matplotlib>=3.7; extra == 'dev'
33
+ Requires-Dist: pytest>=7.0; extra == 'dev'
34
+ Provides-Extra: docs
35
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
36
+ Requires-Dist: pymdown-extensions>=10.0; extra == 'docs'
37
+ Provides-Extra: viz
38
+ Requires-Dist: matplotlib>=3.7; extra == 'viz'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # skillinfer
42
+
43
+ **Infer a full skill profile from a few observations.**
44
+
45
+ Observe a few skills, predict the rest — with calibrated uncertainty. `skillinfer` learns how capabilities co-vary across a population and uses that structure to infer a full profile from partial observations.
46
+
47
+ A closed-form Bayesian update — no training loop, no GPU. One matrix operation gives you the exact posterior. Under 1ms per update, scales to 1000+ skills.
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install skillinfer
53
+ ```
54
+
55
+ ## Quick start
56
+
57
+ ```python
58
+ import skillinfer
59
+
60
+ pop = skillinfer.datasets.onet() # 894 occupations x 120 skills
61
+ profile = pop.profile() # new entity, unknown
62
+ profile.observe("Skill:Programming", 0.92)
63
+ print(profile.predict()) # predict all 120 skills
64
+ ```
65
+
66
+ ```
67
+ feature mean std ci_lower ci_upper
68
+ Skill:Complex Problem Solving 0.81 0.17 0.47 1.00
69
+ Skill:Critical Thinking 0.73 0.15 0.43 1.00
70
+ Skill:Programming 0.92 0.01 0.90 0.93 ← observed
71
+ Skill:Mathematics 0.67 0.12 0.43 0.91
72
+ Ability:Static Strength 0.10 0.23 0.00 0.55 ← anti-correlated
73
+ ...
74
+ [120 rows x 5 columns]
75
+ ```
76
+
77
+ ## How it works
78
+
79
+ When you observe one skill, the Kalman update propagates to every other skill via the learned covariance:
80
+
81
+ - Skills with **positive covariance** move in the same direction (observe high Programming → predict high Analytical Reasoning)
82
+ - Skills with **negative covariance** move opposite (observe high Programming → predict low Static Strength)
83
+ - **Independent skills** are unaffected
84
+
85
+ The update is the standard closed-form Gaussian conditioning rule, and reported predictions are clipped to `[0, 1]` to match the population's natural scale.
86
+
87
+ Each `observe()` call is O(K²) — one matrix-vector product. No iteration, no convergence.
88
+
89
+ ## Core API
90
+
91
+ ```python
92
+ import skillinfer
93
+ from skillinfer import Skill, Task
94
+
95
+ # Build a population from any entity-feature matrix
96
+ pop = skillinfer.Population.from_dataframe(df)
97
+
98
+ # Create a profile and observe
99
+ profile = pop.profile()
100
+ profile.observe("math", 0.95)
101
+ profile.observe_many({"code": 0.89, "writing": 0.70})
102
+
103
+ # Predict with uncertainty
104
+ profile.predict() # all skills, with CIs
105
+ profile.predict("reasoning") # single skill
106
+ profile.most_uncertain(k=3) # what to assess next
107
+
108
+ # Match agents to tasks
109
+ task = Task({"math": 1.0, "reasoning": 0.5})
110
+ result = profile.match_score(task, threshold=0.8)
111
+
112
+ # Rank a pool of agents
113
+ ranking = skillinfer.rank_agents(task, profiles, threshold=0.8)
114
+
115
+ # Summary statistics
116
+ profile.summary(true_vector=ground_truth) # MAE, RMSE, coverage, etc.
117
+ pop.summary() # condition number, sparsity, etc.
118
+ ```
119
+
120
+ ## Built-in datasets
121
+
122
+ Two preprocessed datasets ship with the package (~440 KB total):
123
+
124
+ ```python
125
+ # O*NET 30.2 — U.S. Department of Labor
126
+ # 894 occupations x 120 features (skills, knowledge, abilities)
127
+ pop = skillinfer.datasets.onet()
128
+
129
+ # ESCO v1.2.1 — European Commission
130
+ # 2,999 occupations x 134 skill groups (binary)
131
+ pop = skillinfer.datasets.esco()
132
+ ```
133
+
134
+ | Dataset | Entities | Features | Scale | Source |
135
+ |---------|----------|----------|-------|--------|
136
+ | **O\*NET** | 894 occupations | 120 (35 skills, 33 knowledge, 52 abilities) | Continuous [0, 1] | [O\*NET 30.2](https://www.onetcenter.org/database.html), CC BY 4.0 |
137
+ | **ESCO** | 2,999 occupations | 134 Level-2 skill groups | Binary {0, 1} | [ESCO v1.2.1](https://esco.ec.europa.eu/) |
138
+
139
+ ## Use cases
140
+
141
+ | Domain | Observe | Predict |
142
+ |--------|---------|---------|
143
+ | **AI model selection** | 1-2 benchmark scores | All benchmarks + best model for a task |
144
+ | **Human skill profiling** | A few task observations | Full occupational profile (120 skills) |
145
+ | **Human-AI orchestration** | Partial evals for both | Who handles which subtask |
146
+ | **Worker-task matching** | Known competencies | Fit for new roles and tasks |
147
+
148
+ ## LLM orchestration
149
+
150
+ `skillinfer` profiles are structured context you feed to an LLM orchestrator alongside cost, latency, and business constraints. The LLM reasons about observed vs. inferred skills and applies natural language constraints that no scoring function could replicate:
151
+
152
+ ```python
153
+ from openai import OpenAI
154
+
155
+ # Build profiles from partial evaluations
156
+ agents = {
157
+ "gpt-4o": {"reasoning": 0.92, "code": 0.89},
158
+ "claude-3.5": {"reasoning": 0.90, "writing": 0.95},
159
+ "gemini-pro": {"math": 0.88, "code": 0.82},
160
+ }
161
+ profiles = {
162
+ name: pop.profile().observe_many(obs)
163
+ for name, obs in agents.items()
164
+ }
165
+
166
+ # Format as context for the orchestrator
167
+ agent_context = ""
168
+ for name, profile in profiles.items():
169
+ agent_context += f"\n{name}:\n"
170
+ for skill in ["math", "reasoning", "code"]:
171
+ pred = profile.predict(skill)
172
+ source = "observed" if pred["std"] < 0.01 else "inferred"
173
+ agent_context += f" {skill}: {pred['mean']:.2f} ± {pred['std']:.2f} ({source})\n"
174
+
175
+ # The LLM decides — not a scoring function
176
+ client = OpenAI()
177
+ response = client.chat.completions.create(
178
+ model="gpt-4o-mini",
179
+ messages=[{"role": "user", "content": f"Pick an agent for this math task.\n{agent_context}"}],
180
+ )
181
+ ```
182
+
183
+ ## Export / import
184
+
185
+ ```python
186
+ # Population
187
+ pop.to_csv("population.csv")
188
+ pop.to_parquet("population.parquet")
189
+ pop = skillinfer.Population.from_csv("population.csv")
190
+ pop = skillinfer.Population.from_parquet("population.parquet")
191
+
192
+ # Profile
193
+ profile.to_json("profile.json")
194
+ restored = skillinfer.Profile.from_json("profile.json")
195
+
196
+ d = profile.to_dict() # plain dict, JSON-serialisable
197
+ restored = skillinfer.Profile.from_dict(d)
198
+ ```
199
+
200
+ ## Visualization
201
+
202
+ Requires `pip install skillinfer[viz]`.
203
+
204
+ ```python
205
+ import skillinfer
206
+
207
+ pop = skillinfer.datasets.onet()
208
+ profile = pop.profile()
209
+ profile.observe("Skill:Programming", 0.92)
210
+
211
+ # Population charts
212
+ skillinfer.visualization.correlation_heatmap(pop) # clustered correlation matrix
213
+ skillinfer.visualization.scree_plot(pop) # PCA variance explained
214
+ skillinfer.visualization.feature_distributions(pop) # box plots by variance
215
+ skillinfer.visualization.skill_embedding(pop) # 2D PCA feature map
216
+ skillinfer.visualization.convergence_curve(pop) # MAE vs. observations
217
+
218
+ # Profile charts
219
+ skillinfer.visualization.posterior_profile(profile) # predicted skills + uncertainty
220
+ skillinfer.visualization.prediction_scatter(profile, true_vec) # predicted vs. true
221
+ skillinfer.visualization.uncertainty_waterfall(pop, observations) # uncertainty per observation
222
+ skillinfer.visualization.compare_profiles({"dev": dev, "nurse": nurse}) # side-by-side
223
+ ```
224
+
225
+ ## Documentation
226
+
227
+ Full documentation at [kostadindev.github.io/skillinfer](https://kostadindev.github.io/skillinfer):
228
+
229
+ - [Quickstart](https://kostadindev.github.io/skillinfer/getting-started/quickstart/)
230
+ - [Tutorials](https://kostadindev.github.io/skillinfer/tutorials/llm-benchmarks/) — LLM benchmarks, human skills, ESCO, agent orchestration
231
+ - [How It Works](https://kostadindev.github.io/skillinfer/how-it-works/kalman-update/) — Kalman update, covariance estimation, computational cost
232
+ - [API Reference](https://kostadindev.github.io/skillinfer/api/population/) — Population, Profile, Datasets, Visualization
233
+
234
+ ## License
235
+
236
+ MIT
@@ -0,0 +1,196 @@
1
+ # skillinfer
2
+
3
+ **Infer a full skill profile from a few observations.**
4
+
5
+ Observe a few skills, predict the rest — with calibrated uncertainty. `skillinfer` learns how capabilities co-vary across a population and uses that structure to infer a full profile from partial observations.
6
+
7
+ A closed-form Bayesian update — no training loop, no GPU. One matrix operation gives you the exact posterior. Under 1ms per update, scales to 1000+ skills.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install skillinfer
13
+ ```
14
+
15
+ ## Quick start
16
+
17
+ ```python
18
+ import skillinfer
19
+
20
+ pop = skillinfer.datasets.onet() # 894 occupations x 120 skills
21
+ profile = pop.profile() # new entity, unknown
22
+ profile.observe("Skill:Programming", 0.92)
23
+ print(profile.predict()) # predict all 120 skills
24
+ ```
25
+
26
+ ```
27
+ feature mean std ci_lower ci_upper
28
+ Skill:Complex Problem Solving 0.81 0.17 0.47 1.00
29
+ Skill:Critical Thinking 0.73 0.15 0.43 1.00
30
+ Skill:Programming 0.92 0.01 0.90 0.93 ← observed
31
+ Skill:Mathematics 0.67 0.12 0.43 0.91
32
+ Ability:Static Strength 0.10 0.23 0.00 0.55 ← anti-correlated
33
+ ...
34
+ [120 rows x 5 columns]
35
+ ```
36
+
37
+ ## How it works
38
+
39
+ When you observe one skill, the Kalman update propagates to every other skill via the learned covariance:
40
+
41
+ - Skills with **positive covariance** move in the same direction (observe high Programming → predict high Analytical Reasoning)
42
+ - Skills with **negative covariance** move opposite (observe high Programming → predict low Static Strength)
43
+ - **Independent skills** are unaffected
44
+
45
+ The update is the standard closed-form Gaussian conditioning rule, and reported predictions are clipped to `[0, 1]` to match the population's natural scale.
46
+
47
+ Each `observe()` call is O(K²) — one matrix-vector product. No iteration, no convergence.
48
+
49
+ ## Core API
50
+
51
+ ```python
52
+ import skillinfer
53
+ from skillinfer import Skill, Task
54
+
55
+ # Build a population from any entity-feature matrix
56
+ pop = skillinfer.Population.from_dataframe(df)
57
+
58
+ # Create a profile and observe
59
+ profile = pop.profile()
60
+ profile.observe("math", 0.95)
61
+ profile.observe_many({"code": 0.89, "writing": 0.70})
62
+
63
+ # Predict with uncertainty
64
+ profile.predict() # all skills, with CIs
65
+ profile.predict("reasoning") # single skill
66
+ profile.most_uncertain(k=3) # what to assess next
67
+
68
+ # Match agents to tasks
69
+ task = Task({"math": 1.0, "reasoning": 0.5})
70
+ result = profile.match_score(task, threshold=0.8)
71
+
72
+ # Rank a pool of agents
73
+ ranking = skillinfer.rank_agents(task, profiles, threshold=0.8)
74
+
75
+ # Summary statistics
76
+ profile.summary(true_vector=ground_truth) # MAE, RMSE, coverage, etc.
77
+ pop.summary() # condition number, sparsity, etc.
78
+ ```
79
+
80
+ ## Built-in datasets
81
+
82
+ Two preprocessed datasets ship with the package (~440 KB total):
83
+
84
+ ```python
85
+ # O*NET 30.2 — U.S. Department of Labor
86
+ # 894 occupations x 120 features (skills, knowledge, abilities)
87
+ pop = skillinfer.datasets.onet()
88
+
89
+ # ESCO v1.2.1 — European Commission
90
+ # 2,999 occupations x 134 skill groups (binary)
91
+ pop = skillinfer.datasets.esco()
92
+ ```
93
+
94
+ | Dataset | Entities | Features | Scale | Source |
95
+ |---------|----------|----------|-------|--------|
96
+ | **O\*NET** | 894 occupations | 120 (35 skills, 33 knowledge, 52 abilities) | Continuous [0, 1] | [O\*NET 30.2](https://www.onetcenter.org/database.html), CC BY 4.0 |
97
+ | **ESCO** | 2,999 occupations | 134 Level-2 skill groups | Binary {0, 1} | [ESCO v1.2.1](https://esco.ec.europa.eu/) |
98
+
99
+ ## Use cases
100
+
101
+ | Domain | Observe | Predict |
102
+ |--------|---------|---------|
103
+ | **AI model selection** | 1-2 benchmark scores | All benchmarks + best model for a task |
104
+ | **Human skill profiling** | A few task observations | Full occupational profile (120 skills) |
105
+ | **Human-AI orchestration** | Partial evals for both | Who handles which subtask |
106
+ | **Worker-task matching** | Known competencies | Fit for new roles and tasks |
107
+
108
+ ## LLM orchestration
109
+
110
+ `skillinfer` profiles are structured context you feed to an LLM orchestrator alongside cost, latency, and business constraints. The LLM reasons about observed vs. inferred skills and applies natural language constraints that no scoring function could replicate:
111
+
112
+ ```python
113
+ from openai import OpenAI
114
+
115
+ # Build profiles from partial evaluations
116
+ agents = {
117
+ "gpt-4o": {"reasoning": 0.92, "code": 0.89},
118
+ "claude-3.5": {"reasoning": 0.90, "writing": 0.95},
119
+ "gemini-pro": {"math": 0.88, "code": 0.82},
120
+ }
121
+ profiles = {
122
+ name: pop.profile().observe_many(obs)
123
+ for name, obs in agents.items()
124
+ }
125
+
126
+ # Format as context for the orchestrator
127
+ agent_context = ""
128
+ for name, profile in profiles.items():
129
+ agent_context += f"\n{name}:\n"
130
+ for skill in ["math", "reasoning", "code"]:
131
+ pred = profile.predict(skill)
132
+ source = "observed" if pred["std"] < 0.01 else "inferred"
133
+ agent_context += f" {skill}: {pred['mean']:.2f} ± {pred['std']:.2f} ({source})\n"
134
+
135
+ # The LLM decides — not a scoring function
136
+ client = OpenAI()
137
+ response = client.chat.completions.create(
138
+ model="gpt-4o-mini",
139
+ messages=[{"role": "user", "content": f"Pick an agent for this math task.\n{agent_context}"}],
140
+ )
141
+ ```
142
+
143
+ ## Export / import
144
+
145
+ ```python
146
+ # Population
147
+ pop.to_csv("population.csv")
148
+ pop.to_parquet("population.parquet")
149
+ pop = skillinfer.Population.from_csv("population.csv")
150
+ pop = skillinfer.Population.from_parquet("population.parquet")
151
+
152
+ # Profile
153
+ profile.to_json("profile.json")
154
+ restored = skillinfer.Profile.from_json("profile.json")
155
+
156
+ d = profile.to_dict() # plain dict, JSON-serialisable
157
+ restored = skillinfer.Profile.from_dict(d)
158
+ ```
159
+
160
+ ## Visualization
161
+
162
+ Requires `pip install skillinfer[viz]`.
163
+
164
+ ```python
165
+ import skillinfer
166
+
167
+ pop = skillinfer.datasets.onet()
168
+ profile = pop.profile()
169
+ profile.observe("Skill:Programming", 0.92)
170
+
171
+ # Population charts
172
+ skillinfer.visualization.correlation_heatmap(pop) # clustered correlation matrix
173
+ skillinfer.visualization.scree_plot(pop) # PCA variance explained
174
+ skillinfer.visualization.feature_distributions(pop) # box plots by variance
175
+ skillinfer.visualization.skill_embedding(pop) # 2D PCA feature map
176
+ skillinfer.visualization.convergence_curve(pop) # MAE vs. observations
177
+
178
+ # Profile charts
179
+ skillinfer.visualization.posterior_profile(profile) # predicted skills + uncertainty
180
+ skillinfer.visualization.prediction_scatter(profile, true_vec) # predicted vs. true
181
+ skillinfer.visualization.uncertainty_waterfall(pop, observations) # uncertainty per observation
182
+ skillinfer.visualization.compare_profiles({"dev": dev, "nurse": nurse}) # side-by-side
183
+ ```
184
+
185
+ ## Documentation
186
+
187
+ Full documentation at [kostadindev.github.io/skillinfer](https://kostadindev.github.io/skillinfer):
188
+
189
+ - [Quickstart](https://kostadindev.github.io/skillinfer/getting-started/quickstart/)
190
+ - [Tutorials](https://kostadindev.github.io/skillinfer/tutorials/llm-benchmarks/) — LLM benchmarks, human skills, ESCO, agent orchestration
191
+ - [How It Works](https://kostadindev.github.io/skillinfer/how-it-works/kalman-update/) — Kalman update, covariance estimation, computational cost
192
+ - [API Reference](https://kostadindev.github.io/skillinfer/api/population/) — Population, Profile, Datasets, Visualization
193
+
194
+ ## License
195
+
196
+ MIT
@@ -0,0 +1,15 @@
1
+ # TODOS — skillinfer
2
+
3
+ ## v0.2.0 Roadmap
4
+
5
+ - [ ] **Active Evaluation Scheduler** (`next_eval()`): Multi-entity, budget-constrained evaluation scheduling. Given N agents and a budget of M benchmark runs, decide which (agent, benchmark) cell to fill next, maximizing information gain via the Kalman posterior covariance.
6
+ - [ ] **Thompson sampling** in `rank_agents()`: `metric="thompson"` draws from the posterior for exploration. Requires handling near-singular covariance via `scipy.stats.multivariate_normal(allow_singular=True)` or Cholesky with jitter.
7
+ - [ ] **Process noise**: Time-varying capabilities for agents that improve/degrade over time. Add Q matrix to the Kalman update for non-stationary features.
8
+ - [ ] **GP baseline comparison**: Benchmark against Gaussian Processes for K=6 to honestly evaluate the accuracy/speed tradeoff vs GPs.
9
+
10
+ ## Nice-to-haves
11
+
12
+ - [ ] Cache PCA in `Taxonomy.__str__()` to avoid recomputing on every print
13
+ - [ ] Fuzzy match suggestions in error messages ("did you mean 'MATH Lvl 5'?")
14
+ - [ ] `Taxonomy.new_states(names)` helper for batch state creation
15
+ - [ ] Probit/ordinal link for binary/ordinal data (O*NET, student assessments)
@@ -0,0 +1,168 @@
1
+ # Datasets
2
+
3
+ Built-in datasets that return a `Population` ready for profiling. No downloads, no preprocessing — just import and go.
4
+
5
+ ```python
6
+ import skillinfer
7
+
8
+ pop = skillinfer.datasets.onet() # or skillinfer.datasets.esco()
9
+ profile = pop.profile()
10
+ profile.observe("Skill:Programming", 0.92)
11
+ print(profile.predict())
12
+ ```
13
+
14
+ Both datasets are shipped as compressed Parquet files inside the package (~440 KB total).
15
+
16
+ ---
17
+
18
+ ## `onet()`
19
+
20
+ **O\*NET 30.2** — the U.S. Department of Labor's occupational information network.
21
+
22
+ O\*NET is the most comprehensive public database of occupational skill requirements. It describes what workers in each occupation need to know and be able to do, based on surveys of incumbent workers and occupational analysts.
23
+
24
+ ```python
25
+ pop = skillinfer.datasets.onet()
26
+ print(pop)
27
+ # Population(894 entities x 120 skills, shrinkage=0.0054)
28
+ ```
29
+
30
+ ### What's in it
31
+
32
+ | | |
33
+ |---|---|
34
+ | **Entities** | 894 occupations (e.g., "Software Developers", "Registered Nurses", "Chief Executives") |
35
+ | **Features** | 120 total: 35 skills, 33 knowledge areas, 52 abilities |
36
+ | **Scale** | Continuous importance ratings, normalised to [0, 1] |
37
+ | **Source** | [O\*NET 30.2](https://www.onetcenter.org/database.html), U.S. Department of Labor / ETA |
38
+ | **License** | CC BY 4.0 |
39
+
40
+ ### Feature categories
41
+
42
+ Features are prefixed by category:
43
+
44
+ - **`Skill:`** — learned capabilities (e.g., `Skill:Programming`, `Skill:Critical Thinking`, `Skill:Writing`)
45
+ - **`Knowledge:`** — domain knowledge (e.g., `Knowledge:Mathematics`, `Knowledge:Computers and Electronics`)
46
+ - **`Ability:`** — enduring attributes (e.g., `Ability:Deductive Reasoning`, `Ability:Static Strength`, `Ability:Manual Dexterity`)
47
+
48
+ ### How it was preprocessed
49
+
50
+ The raw [O\*NET 30.2 database](https://www.onetcenter.org/dl_files/database/db_30_2_text.zip) contains multiple scales per feature (importance, level, relevance). We extract:
51
+
52
+ 1. **[Skills.txt](https://www.onetcenter.org/dictionary/30.2/text/Skills.html)**, **[Knowledge.txt](https://www.onetcenter.org/dictionary/30.2/text/Knowledge.html)**, **[Abilities.txt](https://www.onetcenter.org/dictionary/30.2/text/Abilities.html)** from the O\*NET 30.2 database
53
+ 2. Filter to the **Importance** scale (`Scale ID = "IM"`), which rates each feature on a 1–5 scale
54
+ 3. Drop rows marked as suppressed (`Recommend Suppress = "Y"`)
55
+ 4. Pivot to an **occupation × feature** matrix (894 × 120)
56
+ 5. Normalise each column to **[0, 1]** using min-max scaling
57
+ 6. Replace O\*NET SOC codes with human-readable occupation titles
58
+
59
+ ### Population statistics
60
+
61
+ | Statistic | Value |
62
+ |-----------|-------|
63
+ | Mean feature value | 0.385 |
64
+ | Feature std | 0.228 |
65
+ | Density (non-zero entries) | 92.2% |
66
+ | Ledoit-Wolf shrinkage | 0.0054 |
67
+ | Condition number | 1,884 |
68
+ | Effective dimensions (90% var) | ~15 |
69
+ | Mean \|correlation\| | 0.336 |
70
+ | Correlation sparsity (<0.1) | 14.8% |
71
+
72
+ The high mean correlation (0.336) and low sparsity (14.8%) mean most features are correlated — the population has rich transfer structure. Observing a few skills tells you a lot about the rest.
73
+
74
+ ### Example
75
+
76
+ ```python
77
+ pop = skillinfer.datasets.onet()
78
+
79
+ profile = pop.profile()
80
+ profile.observe("Skill:Programming", 0.92)
81
+ profile.observe("Skill:Critical Thinking", 0.85)
82
+ print(profile.predict())
83
+
84
+ # Use a specific occupation as prior
85
+ profile = pop.profile(prior_entity="Software Developers")
86
+ ```
87
+
88
+ **Parameters**
89
+
90
+ | Parameter | Type | Default | Description |
91
+ |-----------|------|---------|-------------|
92
+ | `normalize` | `bool` | `False` | Re-normalise columns to [0, 1]. Data is already normalised, so this is rarely needed. |
93
+
94
+ ---
95
+
96
+ ## `esco()`
97
+
98
+ **ESCO v1.2.1** — the European Commission's taxonomy of Skills, Competences, Qualifications and Occupations.
99
+
100
+ ESCO is an independently curated taxonomy maintained by EU expert panels. Where O\*NET uses continuous importance ratings from U.S. surveys, ESCO uses binary skill assignments curated by European domain experts. This makes it a strong cross-validation target — if `skillinfer` works on both, the method generalises beyond any single taxonomy.
101
+
102
+ ```python
103
+ pop = skillinfer.datasets.esco()
104
+ print(pop)
105
+ # Population(2999 entities x 134 skills, shrinkage=0.0211)
106
+ ```
107
+
108
+ ### What's in it
109
+
110
+ | | |
111
+ |---|---|
112
+ | **Entities** | 2,999 occupations (e.g., "technical director", "registered nurse", "software developer") |
113
+ | **Features** | 134 Level-2 skill groups |
114
+ | **Scale** | Binary (1 = occupation requires at least one essential skill in that group, 0 = does not) |
115
+ | **Source** | [ESCO v1.2.1](https://esco.ec.europa.eu/), European Commission |
116
+
117
+ ### How it was preprocessed
118
+
119
+ The [ESCO classification](https://esco.ec.europa.eu/en/use-esco/download) assigns ~13,000 individual skills to occupations. We aggregate to a manageable matrix:
120
+
121
+ 1. Load **[occupationSkillRelations_en.csv](https://esco.ec.europa.eu/en/use-esco/download)** — maps occupations to individual skills with relation types (essential/optional)
122
+ 2. Filter to **essential** relations only
123
+ 3. Walk each skill up the **[ESCO skill hierarchy](https://esco.ec.europa.eu/en/about-esco/what-does-esco-cover/skills-and-competences)** (via `broaderRelationsSkillPillar_en.csv` and `skillsHierarchy_en.csv`) to its **Level-2 skill group** ancestor
124
+ 4. Build a binary **occupation × skill-group** matrix: 1 if the occupation has at least one essential skill in that group, 0 otherwise
125
+ 5. Replace skill group URIs with human-readable **preferred labels** from the hierarchy
126
+ 6. Drop occupations with fewer than 5 skill groups (too sparse to be informative)
127
+ 7. Replace occupation URIs with human-readable titles from **occupations_en.csv**
128
+
129
+ ### Population statistics
130
+
131
+ | Statistic | Value |
132
+ |-----------|-------|
133
+ | Density (fraction of 1s) | 10.1% |
134
+ | Ledoit-Wolf shrinkage | 0.0211 |
135
+ | Condition number | 468 |
136
+ | Effective dimensions (90% var) | ~15 |
137
+ | Mean \|correlation\| | 0.055 |
138
+ | Correlation sparsity (<0.1) | 84.6% |
139
+
140
+ ESCO is much sparser than O\*NET — only 10% of entries are 1, and 84.6% of feature correlations are near zero. The covariance structure is concentrated in a few meaningful clusters (e.g., healthcare skills co-occur, IT skills co-occur), with most skill groups being independent. Transfer is still valuable but more targeted.
141
+
142
+ ### Key differences from O\*NET
143
+
144
+ | | O\*NET | ESCO |
145
+ |---|--------|------|
146
+ | **Source** | U.S. Department of Labor surveys | EU expert panel curation |
147
+ | **Scale** | Continuous [0, 1] | Binary {0, 1} |
148
+ | **Features** | 120 (skills + knowledge + abilities) | 134 (skill groups only) |
149
+ | **Entities** | 894 occupations | 2,999 occupations |
150
+ | **Correlation structure** | Dense (mean \|r\| = 0.34) | Sparse (mean \|r\| = 0.06) |
151
+ | **Best for** | Rich skill profiling, continuous predictions | Cross-validation, binary classification tasks |
152
+
153
+ ### Example
154
+
155
+ ```python
156
+ pop = skillinfer.datasets.esco()
157
+
158
+ profile = pop.profile()
159
+ profile.observe("education", 1.0)
160
+ profile.observe("teaching and training", 1.0)
161
+ print(profile.predict())
162
+ ```
163
+
164
+ **Parameters**
165
+
166
+ | Parameter | Type | Default | Description |
167
+ |-----------|------|---------|-------------|
168
+ | `normalize` | `bool` | `False` | Normalise columns to [0, 1]. Data is binary, so this is rarely needed. |