orbit-robotics 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. orbit_robotics-0.2.0/LICENSE +21 -0
  2. orbit_robotics-0.2.0/PKG-INFO +164 -0
  3. orbit_robotics-0.2.0/README.md +122 -0
  4. orbit_robotics-0.2.0/pyproject.toml +86 -0
  5. orbit_robotics-0.2.0/setup.cfg +4 -0
  6. orbit_robotics-0.2.0/src/orbit/__init__.py +3 -0
  7. orbit_robotics-0.2.0/src/orbit/analyzer/__init__.py +0 -0
  8. orbit_robotics-0.2.0/src/orbit/analyzer/community_comparison.py +220 -0
  9. orbit_robotics-0.2.0/src/orbit/analyzer/coverage.py +229 -0
  10. orbit_robotics-0.2.0/src/orbit/analyzer/data/ground_truth_condensed.json +740 -0
  11. orbit_robotics-0.2.0/src/orbit/analyzer/dataset_loader.py +334 -0
  12. orbit_robotics-0.2.0/src/orbit/analyzer/embeddings.py +510 -0
  13. orbit_robotics-0.2.0/src/orbit/analyzer/llm_recommendations.py +232 -0
  14. orbit_robotics-0.2.0/src/orbit/analyzer/phase_detector.py +335 -0
  15. orbit_robotics-0.2.0/src/orbit/analyzer/policy_fit.py +361 -0
  16. orbit_robotics-0.2.0/src/orbit/analyzer/quality.py +506 -0
  17. orbit_robotics-0.2.0/src/orbit/analyzer/recommendations.py +198 -0
  18. orbit_robotics-0.2.0/src/orbit/analyzer/signal_diagnostics.py +928 -0
  19. orbit_robotics-0.2.0/src/orbit/analyzer/success_predictor.py +548 -0
  20. orbit_robotics-0.2.0/src/orbit/analyzer/task_context.py +360 -0
  21. orbit_robotics-0.2.0/src/orbit/analyzer/task_inference.py +323 -0
  22. orbit_robotics-0.2.0/src/orbit/analyzer/vlm_predictor.py +599 -0
  23. orbit_robotics-0.2.0/src/orbit/analyzer/vlm_task_analyzer.py +473 -0
  24. orbit_robotics-0.2.0/src/orbit/cli.py +1368 -0
  25. orbit_robotics-0.2.0/src/orbit/models/__init__.py +0 -0
  26. orbit_robotics-0.2.0/src/orbit/planner/__init__.py +0 -0
  27. orbit_robotics-0.2.0/src/orbit/planner/playbook.py +241 -0
  28. orbit_robotics-0.2.0/src/orbit/planner/robot_configs/aloha.json +9 -0
  29. orbit_robotics-0.2.0/src/orbit/planner/robot_configs/koch.json +9 -0
  30. orbit_robotics-0.2.0/src/orbit/planner/robot_configs/so100.json +9 -0
  31. orbit_robotics-0.2.0/src/orbit/planner/robot_configs/so101.json +9 -0
  32. orbit_robotics-0.2.0/src/orbit/planner/templates/generic.json +31 -0
  33. orbit_robotics-0.2.0/src/orbit/planner/templates/insertion.json +31 -0
  34. orbit_robotics-0.2.0/src/orbit/planner/templates/pick_and_place.json +31 -0
  35. orbit_robotics-0.2.0/src/orbit/planner/templates/pouring.json +31 -0
  36. orbit_robotics-0.2.0/src/orbit/planner/templates/stacking.json +31 -0
  37. orbit_robotics-0.2.0/src/orbit/planner/templates/wiping.json +31 -0
  38. orbit_robotics-0.2.0/src/orbit/tracker/__init__.py +0 -0
  39. orbit_robotics-0.2.0/src/orbit/tracker/progress.py +296 -0
  40. orbit_robotics-0.2.0/src/orbit/utils/__init__.py +0 -0
  41. orbit_robotics-0.2.0/src/orbit/utils/display.py +44 -0
  42. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/PKG-INFO +164 -0
  43. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/SOURCES.txt +61 -0
  44. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/dependency_links.txt +1 -0
  45. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/entry_points.txt +2 -0
  46. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/requires.txt +24 -0
  47. orbit_robotics-0.2.0/src/orbit_robotics.egg-info/top_level.txt +1 -0
  48. orbit_robotics-0.2.0/tests/test_cli.py +317 -0
  49. orbit_robotics-0.2.0/tests/test_coverage.py +90 -0
  50. orbit_robotics-0.2.0/tests/test_dataset_loader.py +242 -0
  51. orbit_robotics-0.2.0/tests/test_embeddings.py +369 -0
  52. orbit_robotics-0.2.0/tests/test_full_pipeline.py +172 -0
  53. orbit_robotics-0.2.0/tests/test_integration.py +70 -0
  54. orbit_robotics-0.2.0/tests/test_planner.py +218 -0
  55. orbit_robotics-0.2.0/tests/test_policy_fit.py +111 -0
  56. orbit_robotics-0.2.0/tests/test_quality.py +217 -0
  57. orbit_robotics-0.2.0/tests/test_quality_metrics.py +264 -0
  58. orbit_robotics-0.2.0/tests/test_recommendations.py +185 -0
  59. orbit_robotics-0.2.0/tests/test_signal_diagnostics.py +562 -0
  60. orbit_robotics-0.2.0/tests/test_success_predictor.py +142 -0
  61. orbit_robotics-0.2.0/tests/test_task_context.py +151 -0
  62. orbit_robotics-0.2.0/tests/test_tracker.py +272 -0
  63. orbit_robotics-0.2.0/tests/test_vlm_predictor.py +453 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rahil Lasne
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.4
2
+ Name: orbit-robotics
3
+ Version: 0.2.0
4
+ Summary: Predict whether your robot learning data will actually train successfully
5
+ Author: Rahil Lasne
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Rahillasne/orbit-robotics
8
+ Project-URL: Repository, https://github.com/Rahillasne/orbit-robotics
9
+ Project-URL: Issues, https://github.com/Rahillasne/orbit-robotics/issues
10
+ Keywords: robotics,machine-learning,data-quality,imitation-learning,lerobot
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: click>=8.0
22
+ Requires-Dist: rich>=13.0
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: pandas>=2.0
25
+ Requires-Dist: scipy>=1.10
26
+ Requires-Dist: huggingface-hub>=0.20
27
+ Provides-Extra: vision
28
+ Requires-Dist: torch>=2.0; extra == "vision"
29
+ Requires-Dist: transformers>=4.36; extra == "vision"
30
+ Requires-Dist: scikit-learn>=1.3; extra == "vision"
31
+ Requires-Dist: opencv-python>=4.8; extra == "vision"
32
+ Requires-Dist: decord>=0.6; extra == "vision"
33
+ Requires-Dist: Pillow>=10.0; extra == "vision"
34
+ Provides-Extra: vlm
35
+ Requires-Dist: google-generativeai>=0.5; extra == "vlm"
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest>=7.0; extra == "dev"
38
+ Requires-Dist: ruff>=0.1; extra == "dev"
39
+ Provides-Extra: all
40
+ Requires-Dist: orbit-robotics[vision,vlm]; extra == "all"
41
+ Dynamic: license-file
42
+
43
+ # ORBIT
44
+
45
+ **Predict whether your robot learning data will actually train successfully.**
46
+
47
+ ORBIT analyzes your robot demonstration dataset and predicts your policy's success rate — before you spend hours training. It connects to any [LeRobot](https://huggingface.co/lerobot) dataset on HuggingFace Hub.
48
+
49
+ ## Quick Start
50
+
51
+ ```bash
52
+ pip install orbit-robotics
53
+ orbit analyze lerobot/aloha_sim_transfer_cube_human
54
+ ```
55
+
56
+ ## What You Get
57
+
58
+ - **Quality score** with component breakdown (position diversity, action diversity, consistency, temporal coverage)
59
+ - **Dead joint detection** — catches stuck servos that waste training compute
60
+ - **Gripper analysis** — continuous and discrete gripper detection via bimodal clustering
61
+ - **Directional bias detection** — distinguishes goal-directed motion from data collection problems
62
+ - **Calibrated success rate prediction** with confidence interval, benchmarked against 82 published results
63
+ - **Policy fit analysis** — ACT, Diffusion Policy, SmolVLA, DP3, BC, BC-RNN
64
+ - **Community comparison** against 82 benchmarked datasets from published papers
65
+ - **Actionable recommendations** with specific joint/episode numbers
66
+
67
+ ## Commands
68
+
69
+ ```bash
70
+ orbit analyze <dataset> # full analysis
71
+ orbit analyze <dataset> --json # machine-readable output
72
+ orbit analyze <dataset> --policy act # specific policy fit
73
+ orbit analyze <dataset> --skip-embeddings --skip-ai-assessment # fast mode
74
+ orbit benchmark # browse 82 benchmark entries
75
+ orbit benchmark --task pick_and_place --min-success 0.7
76
+ orbit benchmark aloha --top 5
77
+ orbit plan "pick up cups" --robot so100 --policy act
78
+ ```
79
+
80
+ ## Policy Support
81
+
82
+ | Policy | Flag | Notes |
83
+ |--------|------|-------|
84
+ | ACT | `--policy act` | Action Chunking Transformer — needs consistent, high-res demos |
85
+ | Diffusion Policy | `--policy diffusion_policy` | Handles multimodal data well |
86
+ | SmolVLA | `--policy smolvla` | Vision-Language-Action — language-conditioned |
87
+ | DP3 | `--policy dp3` | 3D Diffusion Policy |
88
+ | BC | `--policy bc` | Behavioral Cloning baseline |
89
+ | BC-RNN | `--policy bc_rnn` | Recurrent Behavioral Cloning |
90
+
91
+ Use `--policy auto` (default) to let ORBIT recommend the best policy for your data.
92
+
93
+ ## Robot Support
94
+
95
+ | Robot | Type | Arms |
96
+ |-------|------|------|
97
+ | SO-100 | Desktop arm | 1 |
98
+ | SO-101 | Desktop arm | 1 |
99
+ | Koch v1.1 | Desktop arm | 1 |
100
+ | ALOHA | Bimanual | 2 |
101
+ | xArm | Industrial | 1 |
102
+ | Custom | Any | `--robot custom` |
103
+
104
+ ## Advanced Usage
105
+
106
+ ### VLM-enhanced analysis
107
+
108
+ ```bash
109
+ pip install orbit-robotics[vlm]
110
+ export GOOGLE_API_KEY=your_key
111
+ orbit analyze lerobot/aloha_sim_transfer_cube_human
112
+ ```
113
+
114
+ Gemini Flash analyzes your observation frames to identify task type, failure modes, and difficulty — improving prediction accuracy.
115
+
116
+ ### Embedding analysis
117
+
118
+ ```bash
119
+ pip install orbit-robotics[vision]
120
+ orbit analyze lerobot/pusht
121
+ ```
122
+
123
+ SigLIP embeddings measure visual diversity across episodes and detect outliers.
124
+
125
+ ### JSON output
126
+
127
+ ```bash
128
+ orbit analyze lerobot/pusht --json
129
+ ```
130
+
131
+ ### Policy comparison
132
+
133
+ ```bash
134
+ orbit analyze lerobot/pusht --policy act
135
+ orbit analyze lerobot/pusht --policy diffusion_policy
136
+ ```
137
+
138
+ ## How It Works
139
+
140
+ ORBIT fetches dataset metadata and episode samples from HuggingFace Hub without downloading the full dataset. It runs signal diagnostics on every joint dimension to detect dead joints, clipping, and directional bias. Task complexity is estimated from action dimensionality, temporal structure, and coordination patterns. Policy fit scores how well your data matches the requirements of your chosen policy (episode count, consistency, action dimensions). All factors feed into a calibrated predictor benchmarked against 82 ground truth results from published papers (ACT, Diffusion Policy, BC variants across Push-T, ALOHA, RoboMimic, and more).
141
+
142
+ ## Install Options
143
+
144
+ ```bash
145
+ pip install orbit-robotics # Core analysis (no GPU needed)
146
+ pip install orbit-robotics[vision] # + SigLIP embedding analysis
147
+ pip install orbit-robotics[vlm] # + Gemini VLM task analysis
148
+ pip install orbit-robotics[all] # Everything
149
+ ```
150
+
151
+ ## Citation
152
+
153
+ ```bibtex
154
+ @software{orbit2026,
155
+ title = {ORBIT: Predict Robot Policy Success from Training Data},
156
+ author = {Lasne, Rahil},
157
+ year = {2026},
158
+ url = {https://github.com/Rahillasne/orbit-robotics}
159
+ }
160
+ ```
161
+
162
+ ## License
163
+
164
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,122 @@
1
+ # ORBIT
2
+
3
+ **Predict whether your robot learning data will actually train successfully.**
4
+
5
+ ORBIT analyzes your robot demonstration dataset and predicts your policy's success rate — before you spend hours training. It connects to any [LeRobot](https://huggingface.co/lerobot) dataset on HuggingFace Hub.
6
+
7
+ ## Quick Start
8
+
9
+ ```bash
10
+ pip install orbit-robotics
11
+ orbit analyze lerobot/aloha_sim_transfer_cube_human
12
+ ```
13
+
14
+ ## What You Get
15
+
16
+ - **Quality score** with component breakdown (position diversity, action diversity, consistency, temporal coverage)
17
+ - **Dead joint detection** — catches stuck servos that waste training compute
18
+ - **Gripper analysis** — continuous and discrete gripper detection via bimodal clustering
19
+ - **Directional bias detection** — distinguishes goal-directed motion from data collection problems
20
+ - **Calibrated success rate prediction** with confidence interval, benchmarked against 82 published results
21
+ - **Policy fit analysis** — ACT, Diffusion Policy, SmolVLA, DP3, BC, BC-RNN
22
+ - **Community comparison** against 82 benchmarked datasets from published papers
23
+ - **Actionable recommendations** with specific joint/episode numbers
24
+
25
+ ## Commands
26
+
27
+ ```bash
28
+ orbit analyze <dataset> # full analysis
29
+ orbit analyze <dataset> --json # machine-readable output
30
+ orbit analyze <dataset> --policy act # specific policy fit
31
+ orbit analyze <dataset> --skip-embeddings --skip-ai-assessment # fast mode
32
+ orbit benchmark # browse 82 benchmark entries
33
+ orbit benchmark --task pick_and_place --min-success 0.7
34
+ orbit benchmark aloha --top 5
35
+ orbit plan "pick up cups" --robot so100 --policy act
36
+ ```
37
+
38
+ ## Policy Support
39
+
40
+ | Policy | Flag | Notes |
41
+ |--------|------|-------|
42
+ | ACT | `--policy act` | Action Chunking Transformer — needs consistent, high-res demos |
43
+ | Diffusion Policy | `--policy diffusion_policy` | Handles multimodal data well |
44
+ | SmolVLA | `--policy smolvla` | Vision-Language-Action — language-conditioned |
45
+ | DP3 | `--policy dp3` | 3D Diffusion Policy |
46
+ | BC | `--policy bc` | Behavioral Cloning baseline |
47
+ | BC-RNN | `--policy bc_rnn` | Recurrent Behavioral Cloning |
48
+
49
+ Use `--policy auto` (default) to let ORBIT recommend the best policy for your data.
50
+
51
+ ## Robot Support
52
+
53
+ | Robot | Type | Arms |
54
+ |-------|------|------|
55
+ | SO-100 | Desktop arm | 1 |
56
+ | SO-101 | Desktop arm | 1 |
57
+ | Koch v1.1 | Desktop arm | 1 |
58
+ | ALOHA | Bimanual | 2 |
59
+ | xArm | Industrial | 1 |
60
+ | Custom | Any | `--robot custom` |
61
+
62
+ ## Advanced Usage
63
+
64
+ ### VLM-enhanced analysis
65
+
66
+ ```bash
67
+ pip install orbit-robotics[vlm]
68
+ export GOOGLE_API_KEY=your_key
69
+ orbit analyze lerobot/aloha_sim_transfer_cube_human
70
+ ```
71
+
72
+ Gemini Flash analyzes your observation frames to identify task type, failure modes, and difficulty — improving prediction accuracy.
73
+
74
+ ### Embedding analysis
75
+
76
+ ```bash
77
+ pip install orbit-robotics[vision]
78
+ orbit analyze lerobot/pusht
79
+ ```
80
+
81
+ SigLIP embeddings measure visual diversity across episodes and detect outliers.
82
+
83
+ ### JSON output
84
+
85
+ ```bash
86
+ orbit analyze lerobot/pusht --json
87
+ ```
88
+
89
+ ### Policy comparison
90
+
91
+ ```bash
92
+ orbit analyze lerobot/pusht --policy act
93
+ orbit analyze lerobot/pusht --policy diffusion_policy
94
+ ```
95
+
96
+ ## How It Works
97
+
98
+ ORBIT fetches dataset metadata and episode samples from HuggingFace Hub without downloading the full dataset. It runs signal diagnostics on every joint dimension to detect dead joints, clipping, and directional bias. Task complexity is estimated from action dimensionality, temporal structure, and coordination patterns. Policy fit scores how well your data matches the requirements of your chosen policy (episode count, consistency, action dimensions). All factors feed into a calibrated predictor benchmarked against 82 ground truth results from published papers (ACT, Diffusion Policy, BC variants across Push-T, ALOHA, RoboMimic, and more).
99
+
100
+ ## Install Options
101
+
102
+ ```bash
103
+ pip install orbit-robotics # Core analysis (no GPU needed)
104
+ pip install orbit-robotics[vision] # + SigLIP embedding analysis
105
+ pip install orbit-robotics[vlm] # + Gemini VLM task analysis
106
+ pip install orbit-robotics[all] # Everything
107
+ ```
108
+
109
+ ## Citation
110
+
111
+ ```bibtex
112
+ @software{orbit2026,
113
+ title = {ORBIT: Predict Robot Policy Success from Training Data},
114
+ author = {Lasne, Rahil},
115
+ year = {2026},
116
+ url = {https://github.com/Rahillasne/orbit-robotics}
117
+ }
118
+ ```
119
+
120
+ ## License
121
+
122
+ MIT — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,86 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "orbit-robotics"
7
+ version = "0.2.0"
8
+ description = "Predict whether your robot learning data will actually train successfully"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ {name = "Rahil Lasne"},
14
+ ]
15
+ keywords = ["robotics", "machine-learning", "data-quality", "imitation-learning", "lerobot"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Science/Research",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
24
+ ]
25
+ dependencies = [
26
+ "click>=8.0",
27
+ "rich>=13.0",
28
+ "numpy>=1.24",
29
+ "pandas>=2.0",
30
+ "scipy>=1.10",
31
+ "huggingface-hub>=0.20",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ vision = [
36
+ "torch>=2.0",
37
+ "transformers>=4.36",
38
+ "scikit-learn>=1.3",
39
+ "opencv-python>=4.8",
40
+ "decord>=0.6",
41
+ "Pillow>=10.0",
42
+ ]
43
+ vlm = [
44
+ "google-generativeai>=0.5",
45
+ ]
46
+ dev = [
47
+ "pytest>=7.0",
48
+ "ruff>=0.1",
49
+ ]
50
+ all = [
51
+ "orbit-robotics[vision,vlm]",
52
+ ]
53
+
54
+ [project.urls]
55
+ Homepage = "https://github.com/Rahillasne/orbit-robotics"
56
+ Repository = "https://github.com/Rahillasne/orbit-robotics"
57
+ Issues = "https://github.com/Rahillasne/orbit-robotics/issues"
58
+
59
+ [project.scripts]
60
+ orbit = "orbit.cli:main"
61
+
62
+ [tool.setuptools.packages.find]
63
+ where = ["src"]
64
+
65
+ [tool.setuptools.package-data]
66
+ orbit = ["planner/templates/*.json", "planner/robot_configs/*.json", "analyzer/data/*.json"]
67
+
68
+ [tool.ruff]
69
+ target-version = "py310"
70
+ src = ["src"]
71
+ line-length = 100
72
+
73
+ [tool.ruff.lint]
74
+ select = ["E", "F", "I", "W", "UP"]
75
+
76
+ [tool.pytest.ini_options]
77
+ testpaths = ["tests"]
78
+ markers = [
79
+ "integration: marks tests as integration tests (require torch/transformers)",
80
+ ]
81
+
82
+ [tool.mypy]
83
+ python_version = "3.10"
84
+ warn_return_any = true
85
+ warn_unused_configs = true
86
+ disallow_untyped_defs = false
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """orbit-robotics: Data strategy copilot for robot policy training."""
2
+
3
+ __version__ = "0.1.0"
File without changes
@@ -0,0 +1,220 @@
1
+ """Community comparison — compare a dataset against the benchmark database.
2
+
3
+ Finds similar entries by task_type and robot_type, then produces a structured
4
+ comparison showing how the user's dataset stacks up against successful peers.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ GROUND_TRUTH_PATH = Path(__file__).parent / "data" / "ground_truth_condensed.json"
17
+
18
+
19
+ @dataclass
20
+ class PeerEntry:
21
+ """A single benchmark peer for display."""
22
+
23
+ id: str
24
+ dataset: str
25
+ num_episodes: int
26
+ success_rate: float
27
+ policy: str
28
+ task_type: str
29
+
30
+
31
+ @dataclass
32
+ class CommunityComparison:
33
+ """Result of comparing a dataset against the benchmark database."""
34
+
35
+ similar_successful: list[PeerEntry] # peers with >70% success
36
+ similar_all: list[PeerEntry] # all matched peers (for stats)
37
+ your_episodes: int
38
+ peer_avg_episodes: float
39
+ your_episode_percentile: str # "bottom 25%", "middle 50%", etc.
40
+ your_coverage: float | None # overall coverage-like metric
41
+ peer_avg_coverage: float | None
42
+ actionable_tip: str # one-liner recommendation
43
+
44
+
45
+ def compute_community_comparison(
46
+ dataset_name: str,
47
+ task_type: str | None,
48
+ robot_type: str | None,
49
+ num_episodes: int,
50
+ coverage_score: float | None = None,
51
+ policy_type: str | None = None,
52
+ ) -> CommunityComparison | None:
53
+ """Find similar benchmark entries and build a comparison.
54
+
55
+ Returns None if the benchmark database can't be loaded or no similar
56
+ entries are found.
57
+ """
58
+ try:
59
+ with open(GROUND_TRUTH_PATH) as f:
60
+ ground_truth: list[dict] = json.load(f)
61
+ except (FileNotFoundError, json.JSONDecodeError) as e:
62
+ logger.warning("Failed to load benchmark database: %s", e)
63
+ return None
64
+
65
+ if not ground_truth:
66
+ return None
67
+
68
+ # Score each entry for similarity
69
+ scored: list[tuple[float, dict]] = []
70
+ task_lower = (task_type or "").lower()
71
+ robot_lower = (robot_type or "").lower()
72
+ dataset_lower = dataset_name.lower()
73
+
74
+ for entry in ground_truth:
75
+ score = 0.0
76
+ entry_task = entry.get("task_type", "").lower()
77
+ entry_summary = entry.get("features_summary", "").lower()
78
+
79
+ # Task type match (strongest signal)
80
+ if task_lower and task_lower == entry_task:
81
+ score += 5.0
82
+ elif task_lower and task_lower in entry_task:
83
+ score += 3.0
84
+
85
+ # Robot type / bimanual matching via summary
86
+ if robot_lower:
87
+ if robot_lower in entry_summary:
88
+ score += 3.0
89
+ elif "aloha" in robot_lower and "bimanual" in entry_summary:
90
+ score += 2.0
91
+ elif "aloha" in dataset_lower and "aloha" in entry.get("id", "").lower():
92
+ score += 3.0
93
+
94
+ # Dataset name keyword overlap
95
+ name_parts = [
96
+ p for p in dataset_lower.replace("/", " ").replace("_", " ").split()
97
+ if len(p) > 2 and p not in ("sim", "the", "and", "lerobot")
98
+ ]
99
+ for part in name_parts:
100
+ if part in entry.get("id", "").lower() or part in entry_summary:
101
+ score += 1.0
102
+
103
+ if score > 0:
104
+ scored.append((score, entry))
105
+
106
+ if not scored:
107
+ return None
108
+
109
+ # Sort by score, take top 5
110
+ scored.sort(key=lambda x: -x[0])
111
+ top_entries = [e for _, e in scored[:5]]
112
+
113
+ if len(top_entries) < 1:
114
+ return None
115
+
116
+ # Build peer entries
117
+ all_peers = [
118
+ PeerEntry(
119
+ id=e["id"],
120
+ dataset=e["dataset"],
121
+ num_episodes=e.get("num_episodes", 0),
122
+ success_rate=e.get("success_rate", 0.0),
123
+ policy=e.get("policy", "unknown"),
124
+ task_type=e.get("task_type", "unknown"),
125
+ )
126
+ for e in top_entries
127
+ ]
128
+
129
+ successful_peers = [p for p in all_peers if p.success_rate >= 0.70]
130
+
131
+ # Compute stats from all peers
132
+ peer_episodes = [p.num_episodes for p in all_peers if p.num_episodes > 0]
133
+ peer_avg_ep = sum(peer_episodes) / len(peer_episodes) if peer_episodes else 0
134
+
135
+ # Episode percentile
136
+ if peer_episodes:
137
+ below = sum(1 for ep in peer_episodes if num_episodes < ep)
138
+ ratio = below / len(peer_episodes)
139
+ if ratio >= 0.75:
140
+ percentile = "bottom 25%"
141
+ elif ratio >= 0.50:
142
+ percentile = "below average"
143
+ elif ratio >= 0.25:
144
+ percentile = "above average"
145
+ else:
146
+ percentile = "top 25%"
147
+ else:
148
+ percentile = "unknown"
149
+
150
+ # Actionable tip
151
+ tip = _generate_tip(num_episodes, peer_avg_ep, successful_peers, coverage_score)
152
+
153
+ return CommunityComparison(
154
+ similar_successful=successful_peers,
155
+ similar_all=all_peers,
156
+ your_episodes=num_episodes,
157
+ peer_avg_episodes=peer_avg_ep,
158
+ your_episode_percentile=percentile,
159
+ your_coverage=coverage_score,
160
+ peer_avg_coverage=None, # benchmark doesn't store coverage
161
+ actionable_tip=tip,
162
+ )
163
+
164
+
165
+ def _generate_tip(
166
+ num_episodes: int,
167
+ peer_avg_ep: float,
168
+ successful_peers: list[PeerEntry],
169
+ coverage_score: float | None,
170
+ ) -> str:
171
+ """Generate one actionable recommendation from the comparison."""
172
+ if not successful_peers:
173
+ return "No similar datasets with >70% success found — you're pioneering this task type."
174
+
175
+ min_success_eps = min(p.num_episodes for p in successful_peers)
176
+ avg_success_eps = sum(p.num_episodes for p in successful_peers) / len(successful_peers)
177
+
178
+ if num_episodes < min_success_eps:
179
+ gap = int(min_success_eps - num_episodes)
180
+ return f"Collect ~{gap} more episodes to match the smallest successful peer ({min_success_eps} eps)."
181
+
182
+ if num_episodes < avg_success_eps * 0.8:
183
+ gap = int(avg_success_eps - num_episodes)
184
+ return f"Collect ~{gap} more episodes to match successful peers (avg {int(avg_success_eps)} eps)."
185
+
186
+ if coverage_score is not None and coverage_score < 0.70:
187
+ return "Episode count matches peers — focus on diversity (varied start positions, speeds, approaches)."
188
+
189
+ return "Your dataset size matches successful peers — focus on data quality and policy tuning."
190
+
191
+
192
+ def comparison_to_dict(comp: CommunityComparison) -> dict:
193
+ """Convert to a JSON-serializable dict."""
194
+ return {
195
+ "similar_successful": [
196
+ {
197
+ "id": p.id,
198
+ "dataset": p.dataset,
199
+ "num_episodes": p.num_episodes,
200
+ "success_rate": p.success_rate,
201
+ "policy": p.policy,
202
+ }
203
+ for p in comp.similar_successful
204
+ ],
205
+ "similar_all": [
206
+ {
207
+ "id": p.id,
208
+ "dataset": p.dataset,
209
+ "num_episodes": p.num_episodes,
210
+ "success_rate": p.success_rate,
211
+ "policy": p.policy,
212
+ "task_type": p.task_type,
213
+ }
214
+ for p in comp.similar_all
215
+ ],
216
+ "your_episodes": comp.your_episodes,
217
+ "peer_avg_episodes": round(comp.peer_avg_episodes, 1),
218
+ "your_episode_percentile": comp.your_episode_percentile,
219
+ "actionable_tip": comp.actionable_tip,
220
+ }