benchhub-client 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. benchhub_client-0.1.0/LICENSE +21 -0
  2. benchhub_client-0.1.0/PKG-INFO +128 -0
  3. benchhub_client-0.1.0/README.md +113 -0
  4. benchhub_client-0.1.0/benchhub/__init__.py +50 -0
  5. benchhub_client-0.1.0/benchhub/client.py +692 -0
  6. benchhub_client-0.1.0/benchhub/hf_croissant.py +424 -0
  7. benchhub_client-0.1.0/benchhub/hf_materialize.py +454 -0
  8. benchhub_client-0.1.0/benchhub/hf_search.py +372 -0
  9. benchhub_client-0.1.0/benchhub/lb_materialize.py +320 -0
  10. benchhub_client-0.1.0/benchhub/manifest.py +763 -0
  11. benchhub_client-0.1.0/benchhub/preview.py +299 -0
  12. benchhub_client-0.1.0/benchhub/types.py +681 -0
  13. benchhub_client-0.1.0/benchhub_client.egg-info/PKG-INFO +128 -0
  14. benchhub_client-0.1.0/benchhub_client.egg-info/SOURCES.txt +92 -0
  15. benchhub_client-0.1.0/benchhub_client.egg-info/dependency_links.txt +1 -0
  16. benchhub_client-0.1.0/benchhub_client.egg-info/requires.txt +3 -0
  17. benchhub_client-0.1.0/benchhub_client.egg-info/top_level.txt +1 -0
  18. benchhub_client-0.1.0/pyproject.toml +24 -0
  19. benchhub_client-0.1.0/setup.cfg +4 -0
  20. benchhub_client-0.1.0/tests/test_admin_management.py +129 -0
  21. benchhub_client-0.1.0/tests/test_admin_overrides.py +101 -0
  22. benchhub_client-0.1.0/tests/test_admin_user_storage.py +110 -0
  23. benchhub_client-0.1.0/tests/test_api_tokens_legal.py +174 -0
  24. benchhub_client-0.1.0/tests/test_apply_tag_filters.py +168 -0
  25. benchhub_client-0.1.0/tests/test_auth.py +359 -0
  26. benchhub_client-0.1.0/tests/test_bench_cache.py +310 -0
  27. benchhub_client-0.1.0/tests/test_celery_batch.py +110 -0
  28. benchhub_client-0.1.0/tests/test_celery_process_submission.py +435 -0
  29. benchhub_client-0.1.0/tests/test_celery_reaggregate.py +246 -0
  30. benchhub_client-0.1.0/tests/test_client.py +200 -0
  31. benchhub_client-0.1.0/tests/test_dataset_creator.py +299 -0
  32. benchhub_client-0.1.0/tests/test_dataset_field_schema.py +267 -0
  33. benchhub_client-0.1.0/tests/test_dataset_pred_field_editor.py +145 -0
  34. benchhub_client-0.1.0/tests/test_dataset_pred_role.py +225 -0
  35. benchhub_client-0.1.0/tests/test_dataset_schema_edit.py +144 -0
  36. benchhub_client-0.1.0/tests/test_dataset_settings_page.py +61 -0
  37. benchhub_client-0.1.0/tests/test_dataset_view_filter.py +180 -0
  38. benchhub_client-0.1.0/tests/test_discoverability.py +207 -0
  39. benchhub_client-0.1.0/tests/test_download_sample.py +134 -0
  40. benchhub_client-0.1.0/tests/test_explorable.py +135 -0
  41. benchhub_client-0.1.0/tests/test_extracted_eviction.py +244 -0
  42. benchhub_client-0.1.0/tests/test_field_type_edit_and_hf_source.py +134 -0
  43. benchhub_client-0.1.0/tests/test_globals_ownership.py +269 -0
  44. benchhub_client-0.1.0/tests/test_hash_pin_enforcement.py +182 -0
  45. benchhub_client-0.1.0/tests/test_helpers.py +163 -0
  46. benchhub_client-0.1.0/tests/test_hf_category.py +142 -0
  47. benchhub_client-0.1.0/tests/test_hf_croissant.py +262 -0
  48. benchhub_client-0.1.0/tests/test_hf_croissant_routes.py +508 -0
  49. benchhub_client-0.1.0/tests/test_hf_info_fallback.py +219 -0
  50. benchhub_client-0.1.0/tests/test_hf_materialize.py +490 -0
  51. benchhub_client-0.1.0/tests/test_hf_search.py +355 -0
  52. benchhub_client-0.1.0/tests/test_hf_source_provenance.py +233 -0
  53. benchhub_client-0.1.0/tests/test_home.py +125 -0
  54. benchhub_client-0.1.0/tests/test_label_list_and_topk.py +228 -0
  55. benchhub_client-0.1.0/tests/test_label_vocab_render.py +95 -0
  56. benchhub_client-0.1.0/tests/test_landing.py +227 -0
  57. benchhub_client-0.1.0/tests/test_lb_create_metric_picker.py +166 -0
  58. benchhub_client-0.1.0/tests/test_lb_edit_picker.py +457 -0
  59. benchhub_client-0.1.0/tests/test_lb_field_roles.py +238 -0
  60. benchhub_client-0.1.0/tests/test_lb_metric_type_assert.py +325 -0
  61. benchhub_client-0.1.0/tests/test_lb_name_suggestion.py +77 -0
  62. benchhub_client-0.1.0/tests/test_lb_submission_artifacts.py +162 -0
  63. benchhub_client-0.1.0/tests/test_manifest.py +255 -0
  64. benchhub_client-0.1.0/tests/test_metric_context_arrays.py +251 -0
  65. benchhub_client-0.1.0/tests/test_metric_engine.py +214 -0
  66. benchhub_client-0.1.0/tests/test_paired_datasets.py +234 -0
  67. benchhub_client-0.1.0/tests/test_phase_b_end_to_end.py +218 -0
  68. benchhub_client-0.1.0/tests/test_prune_incomplete_datasets.py +80 -0
  69. benchhub_client-0.1.0/tests/test_quota_admin_exempt.py +62 -0
  70. benchhub_client-0.1.0/tests/test_required_pred_fields.py +508 -0
  71. benchhub_client-0.1.0/tests/test_routes_comparison.py +170 -0
  72. benchhub_client-0.1.0/tests/test_routes_leaderboard.py +401 -0
  73. benchhub_client-0.1.0/tests/test_routes_metric.py +280 -0
  74. benchhub_client-0.1.0/tests/test_routes_misc_apis.py +226 -0
  75. benchhub_client-0.1.0/tests/test_routes_submission.py +293 -0
  76. benchhub_client-0.1.0/tests/test_routes_visualization.py +230 -0
  77. benchhub_client-0.1.0/tests/test_sandbox_docker_integration.py +210 -0
  78. benchhub_client-0.1.0/tests/test_sandbox_harness.py +198 -0
  79. benchhub_client-0.1.0/tests/test_sandbox_http_path.py +236 -0
  80. benchhub_client-0.1.0/tests/test_sandbox_server.py +151 -0
  81. benchhub_client-0.1.0/tests/test_sandbox_wiring.py +205 -0
  82. benchhub_client-0.1.0/tests/test_sandbox_wrapper.py +207 -0
  83. benchhub_client-0.1.0/tests/test_shape_match.py +438 -0
  84. benchhub_client-0.1.0/tests/test_sharing.py +155 -0
  85. benchhub_client-0.1.0/tests/test_smoke.py +50 -0
  86. benchhub_client-0.1.0/tests/test_stats_panel_visibility.py +69 -0
  87. benchhub_client-0.1.0/tests/test_submission_viz_assets.py +215 -0
  88. benchhub_client-0.1.0/tests/test_tags.py +80 -0
  89. benchhub_client-0.1.0/tests/test_text_column_visible.py +78 -0
  90. benchhub_client-0.1.0/tests/test_typed_metric_context.py +262 -0
  91. benchhub_client-0.1.0/tests/test_typed_submit.py +253 -0
  92. benchhub_client-0.1.0/tests/test_types.py +288 -0
  93. benchhub_client-0.1.0/tests/test_user_avatar.py +114 -0
  94. benchhub_client-0.1.0/tests/test_visualize.py +332 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yakir Matari and BenchHub contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,128 @@
1
+ Metadata-Version: 2.4
2
+ Name: benchhub-client
3
+ Version: 0.1.0
4
+ Summary: Python client + typed contract for the BenchHub benchmarking platform
5
+ Author: BenchHub
6
+ License: MIT
7
+ Project-URL: Homepage, https://runbenchhub.com
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: numpy
12
+ Requires-Dist: Pillow
13
+ Requires-Dist: requests
14
+ Dynamic: license-file
15
+
16
+ # BenchHub
17
+
18
+ BenchHub is an open-source benchmarking platform: pick a dataset, define
19
+ metrics in Python, upload predictions, and see how your model ranks. Live
20
+ at **https://runbenchhub.com**.
21
+
22
+ Originally built as a private dTOF SPAD pipeline benchmarking tool, then
23
+ generalized into a public, multi-tenant web app.
24
+
25
+ ## Features
26
+
27
+ - **OAuth sign-in (GitHub)** — no passwords; one-click account creation.
28
+ - **Datasets and leaderboards** are global — no project namespace.
29
+ - **Per-row visibility** (`public` / `unlisted` / `private`) on datasets,
30
+ leaderboards, and metric/visualization library entries.
31
+ - **HuggingFace import**: pull a structured HF dataset repo as a one-click
32
+ alternative to a ZIP upload (see `scripts/seed_nyu_v2_curated.py` for
33
+ an example workflow).
34
+ - **User-defined metrics in Python** — bring your own scoring code; the
35
+ metric engine resolves dependencies and runs them per-sample or
36
+ aggregated. Sandbox-isolated when `BENCHHUB_SANDBOX_METRICS=1`.
37
+ - **Asynchronous processing** with Celery (Redis broker).
38
+ - **Per-user quotas**: 50 MB storage, 5 datasets, 50 submissions / 24h
39
+ by default. Free-tier safe to expose to the open internet.
40
+ - **API tokens** for programmatic uploads (`/settings/api_tokens`).
41
+ - **Account deletion** (GDPR right-to-be-forgotten) with cascading cleanup.
42
+ - **Public landing page** at `/`, `/leaderboards` for browsing the catalog,
43
+ `/u/<id>` for public profile pages.
44
+
45
+ ## Prerequisites
46
+
47
+ - Python 3.10+
48
+ - Redis (broker + result backend, default port 6379)
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ git clone <repository-url>
54
+ cd BenchHub
55
+ python -m venv venv && source venv/bin/activate
56
+ pip install -r requirements.txt
57
+ ```
58
+
59
+ ## Running
60
+
61
+ Three terminals:
62
+
63
+ ```bash
64
+ # 1. Redis
65
+ redis-server
66
+
67
+ # 2. Celery worker
68
+ celery -A app.celery worker --loglevel=info
69
+
70
+ # 3. Flask app
71
+ python app.py
72
+ ```
73
+
74
+ Then open `http://localhost:6060`.
75
+
76
+ Data lives outside the repo at `~/.dtofbenchmarking/` (database + uploads).
77
+ Override with `BENCHHUB_DATA_DIR=/some/path`.
78
+
79
+ ## Tests
80
+
81
+ ```bash
82
+ pytest
83
+ ```
84
+
85
+ 429 tests, ~3-4 seconds. Coverage gate is configured in `pytest.ini`.
86
+
87
+ ## Dataset / submission ZIP convention
88
+
89
+ Folders are auto-detected by prefix:
90
+
91
+ | Prefix | Type | Files |
92
+ | --------------- | --------- | -------------------------------------- |
93
+ | `metric_` | metric | `<sample>.txt` containing a float |
94
+ | `hist_` / `raw_histogram` / `hist` | histogram | `<sample>.npz` (`bins`, `counts`) |
95
+ | `raw_` | depth/map | `<sample>_<W>x<H>.npz` |
96
+ | (anything else) | image / scalar / json / text | by file extension |
97
+
98
+ `git_info.json` (or `git.info`) at the ZIP root attaches commit metadata
99
+ to the resulting dataset/submission row.
100
+
101
+ ## DLP-safe code uploads
102
+
103
+ Some networks block `.py` uploads. The metric editor encodes user code
104
+ as `BASE64:<...>` client-side; the server decodes. Standalone helpers:
105
+
106
+ - `scripts/obfuscator.html` — portable browser tool
107
+ - `scripts/obfuscator_gui.py` — Tkinter GUI
108
+
109
+ ## Deployment
110
+
111
+ The production app is self-hosted on a home Ubuntu 24.04 box (RTX 5090,
112
+ 128 GB RAM, 8 TB) reachable at https://runbenchhub.com. gunicorn + celery
113
+ + redis run directly under systemd; nginx + certbot terminate TLS; the
114
+ domain is on Cloudflare in DNS-only mode (no proxy) with `ddclient`
115
+ keeping the A record pointed at the home WAN IP.
116
+
117
+ **Operational runbook: [`docs/SELFHOST_RUNBOOK.md`](docs/SELFHOST_RUNBOOK.md)**
118
+ — code-push procedure, `.env` keys, log tailing, DDNS, TLS renewal,
119
+ rollback, and the breakages we've already hit.
120
+
121
+ Fly.io is deprecated: the app was destroyed after the cutover to the home
122
+ box. The Fly artifacts (`fly.toml`, `Dockerfile`, `DEPLOY.md`, …) are
123
+ archived under [`archive/fly/`](archive/fly/) for the case where a future
124
+ Fly redeploy needs to be reconstructed.
125
+
126
+ ## License
127
+
128
+ (Choose and add a license file — repository currently has no LICENSE.)
@@ -0,0 +1,113 @@
1
+ # BenchHub
2
+
3
+ BenchHub is an open-source benchmarking platform: pick a dataset, define
4
+ metrics in Python, upload predictions, and see how your model ranks. Live
5
+ at **https://runbenchhub.com**.
6
+
7
+ Originally built as a private dTOF SPAD pipeline benchmarking tool, then
8
+ generalized into a public, multi-tenant web app.
9
+
10
+ ## Features
11
+
12
+ - **OAuth sign-in (GitHub)** — no passwords; one-click account creation.
13
+ - **Datasets and leaderboards** are global — no project namespace.
14
+ - **Per-row visibility** (`public` / `unlisted` / `private`) on datasets,
15
+ leaderboards, and metric/visualization library entries.
16
+ - **HuggingFace import**: pull a structured HF dataset repo as a one-click
17
+ alternative to a ZIP upload (see `scripts/seed_nyu_v2_curated.py` for
18
+ an example workflow).
19
+ - **User-defined metrics in Python** — bring your own scoring code; the
20
+ metric engine resolves dependencies and runs them per-sample or
21
+ aggregated. Sandbox-isolated when `BENCHHUB_SANDBOX_METRICS=1`.
22
+ - **Asynchronous processing** with Celery (Redis broker).
23
+ - **Per-user quotas**: 50 MB storage, 5 datasets, 50 submissions / 24h
24
+ by default. Free-tier safe to expose to the open internet.
25
+ - **API tokens** for programmatic uploads (`/settings/api_tokens`).
26
+ - **Account deletion** (GDPR right-to-be-forgotten) with cascading cleanup.
27
+ - **Public landing page** at `/`, `/leaderboards` for browsing the catalog,
28
+ `/u/<id>` for public profile pages.
29
+
30
+ ## Prerequisites
31
+
32
+ - Python 3.10+
33
+ - Redis (broker + result backend, default port 6379)
34
+
35
+ ## Installation
36
+
37
+ ```bash
38
+ git clone <repository-url>
39
+ cd BenchHub
40
+ python -m venv venv && source venv/bin/activate
41
+ pip install -r requirements.txt
42
+ ```
43
+
44
+ ## Running
45
+
46
+ Three terminals:
47
+
48
+ ```bash
49
+ # 1. Redis
50
+ redis-server
51
+
52
+ # 2. Celery worker
53
+ celery -A app.celery worker --loglevel=info
54
+
55
+ # 3. Flask app
56
+ python app.py
57
+ ```
58
+
59
+ Then open `http://localhost:6060`.
60
+
61
+ Data lives outside the repo at `~/.dtofbenchmarking/` (database + uploads).
62
+ Override with `BENCHHUB_DATA_DIR=/some/path`.
63
+
64
+ ## Tests
65
+
66
+ ```bash
67
+ pytest
68
+ ```
69
+
70
+ 429 tests, ~3-4 seconds. Coverage gate is configured in `pytest.ini`.
71
+
72
+ ## Dataset / submission ZIP convention
73
+
74
+ Folders are auto-detected by prefix:
75
+
76
+ | Prefix | Type | Files |
77
+ | --------------- | --------- | -------------------------------------- |
78
+ | `metric_` | metric | `<sample>.txt` containing a float |
79
+ | `hist_` / `raw_histogram` / `hist` | histogram | `<sample>.npz` (`bins`, `counts`) |
80
+ | `raw_` | depth/map | `<sample>_<W>x<H>.npz` |
81
+ | (anything else) | image / scalar / json / text | by file extension |
82
+
83
+ `git_info.json` (or `git.info`) at the ZIP root attaches commit metadata
84
+ to the resulting dataset/submission row.
85
+
86
+ ## DLP-safe code uploads
87
+
88
+ Some networks block `.py` uploads. The metric editor encodes user code
89
+ as `BASE64:<...>` client-side; the server decodes. Standalone helpers:
90
+
91
+ - `scripts/obfuscator.html` — portable browser tool
92
+ - `scripts/obfuscator_gui.py` — Tkinter GUI
93
+
94
+ ## Deployment
95
+
96
+ The production app is self-hosted on a home Ubuntu 24.04 box (RTX 5090,
97
+ 128 GB RAM, 8 TB) reachable at https://runbenchhub.com. gunicorn + celery
98
+ + redis run directly under systemd; nginx + certbot terminate TLS; the
99
+ domain is on Cloudflare in DNS-only mode (no proxy) with `ddclient`
100
+ keeping the A record pointed at the home WAN IP.
101
+
102
+ **Operational runbook: [`docs/SELFHOST_RUNBOOK.md`](docs/SELFHOST_RUNBOOK.md)**
103
+ — code-push procedure, `.env` keys, log tailing, DDNS, TLS renewal,
104
+ rollback, and the breakages we've already hit.
105
+
106
+ Fly.io is deprecated: the app was destroyed after the cutover to the home
107
+ box. The Fly artifacts (`fly.toml`, `Dockerfile`, `DEPLOY.md`, …) are
108
+ archived under [`archive/fly/`](archive/fly/) for the case where a future
109
+ Fly redeploy needs to be reconstructed.
110
+
111
+ ## License
112
+
113
+ (Choose and add a license file — repository currently has no LICENSE.)
@@ -0,0 +1,50 @@
1
+ """BenchHub — shared types + client API.
2
+
3
+ This package is imported by the server (`app.py`, `metric_engine.py`) AND
4
+ shipped to submitters as `benchhub-client`. Single source of truth for
5
+ the strict-typed contract between predictions, GT, and metrics.
6
+ """
7
+
8
+ from benchhub.types import (
9
+ DataType,
10
+ DTYPES,
11
+ Image,
12
+ Mask,
13
+ Depth,
14
+ Audio,
15
+ Text,
16
+ BBoxes,
17
+ Label,
18
+ LabelList,
19
+ Scalar,
20
+ Json,
21
+ get_type,
22
+ )
23
+ from benchhub.client import (
24
+ BenchHubAPIError,
25
+ BHDatasetCreator,
26
+ Client,
27
+ FlaskTestClientTransport,
28
+ SubmissionBuilder,
29
+ )
30
+
31
+ __all__ = [
32
+ "DataType",
33
+ "DTYPES",
34
+ "Image",
35
+ "Mask",
36
+ "Depth",
37
+ "Audio",
38
+ "Text",
39
+ "BBoxes",
40
+ "Label",
41
+ "LabelList",
42
+ "Scalar",
43
+ "Json",
44
+ "get_type",
45
+ "Client",
46
+ "SubmissionBuilder",
47
+ "BHDatasetCreator",
48
+ "FlaskTestClientTransport",
49
+ "BenchHubAPIError",
50
+ ]