hf-discover 1.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. hf_discover-1.3.2/.github/dependabot.yml +11 -0
  2. hf_discover-1.3.2/.github/workflows/ci.yml +48 -0
  3. hf_discover-1.3.2/.github/workflows/release.yml +139 -0
  4. hf_discover-1.3.2/.gitignore +91 -0
  5. hf_discover-1.3.2/AGENTS.md +12 -0
  6. hf_discover-1.3.2/LICENSE +21 -0
  7. hf_discover-1.3.2/PKG-INFO +308 -0
  8. hf_discover-1.3.2/README.md +293 -0
  9. hf_discover-1.3.2/deploy/huggingface-space/.dockerignore +4 -0
  10. hf_discover-1.3.2/deploy/huggingface-space/Dockerfile +18 -0
  11. hf_discover-1.3.2/deploy/huggingface-space/README.md +75 -0
  12. hf_discover-1.3.2/deploy/huggingface-space/start-discover.sh +216 -0
  13. hf_discover-1.3.2/docs/file-tools-eval-report.html +10952 -0
  14. hf_discover-1.3.2/docs/repository-overview.html +197 -0
  15. hf_discover-1.3.2/docs/skills-search-design-deployment-options.html +259 -0
  16. hf_discover-1.3.2/hf-discover.toml +12 -0
  17. hf_discover-1.3.2/plan/skills-search.md +641 -0
  18. hf_discover-1.3.2/pyproject.toml +93 -0
  19. hf_discover-1.3.2/scripts/bump-version.py +85 -0
  20. hf_discover-1.3.2/scripts/check-release.sh +98 -0
  21. hf_discover-1.3.2/scripts/configure-space-runtime.py +143 -0
  22. hf_discover-1.3.2/scripts/smoke-local-registries.sh +83 -0
  23. hf_discover-1.3.2/scripts/update-ai-catalog-spec.sh +101 -0
  24. hf_discover-1.3.2/scripts/vendor-meilisearch.py +144 -0
  25. hf_discover-1.3.2/spec/ai-catalog/SOURCE.md +18 -0
  26. hf_discover-1.3.2/spec/ai-catalog/ai-catalog.md +2211 -0
  27. hf_discover-1.3.2/spec/ai-catalog/examples/ai-catalog.json +34 -0
  28. hf_discover-1.3.2/spec/ai-catalog/respec-config.json +33 -0
  29. hf_discover-1.3.2/spec/ard.md +780 -0
  30. hf_discover-1.3.2/spec/hf-search.md +189 -0
  31. hf_discover-1.3.2/src/discover/__init__.py +1 -0
  32. hf_discover-1.3.2/src/discover/challenge.py +451 -0
  33. hf_discover-1.3.2/src/discover/cli.py +624 -0
  34. hf_discover-1.3.2/src/discover/filters.py +65 -0
  35. hf_discover-1.3.2/src/discover/hf_search.py +111 -0
  36. hf_discover-1.3.2/src/discover/hf_skills.py +199 -0
  37. hf_discover-1.3.2/src/discover/hf_spaces.py +451 -0
  38. hf_discover-1.3.2/src/discover/models.py +98 -0
  39. hf_discover-1.3.2/src/discover/server.py +754 -0
  40. hf_discover-1.3.2/tests/test_challenge.py +126 -0
  41. hf_discover-1.3.2/tests/test_cli.py +172 -0
  42. hf_discover-1.3.2/tests/test_hf_spaces.py +1145 -0
  43. hf_discover-1.3.2/tests/test_models.py +48 -0
  44. hf_discover-1.3.2/typesafe.md +25 -0
  45. hf_discover-1.3.2/uv.lock +760 -0
@@ -0,0 +1,11 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ cooldown:
8
+ default-days: 7
9
+ groups:
10
+ actions:
11
+ patterns: ["*"]
@@ -0,0 +1,48 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+ workflow_dispatch:
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ env:
14
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
15
+
16
+ jobs:
17
+ quality:
18
+ name: Ruff, ty, and pytest
19
+ runs-on: ubuntu-latest
20
+
21
+ steps:
22
+ - name: Check out repository
23
+ uses: actions/checkout@v6
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v8.1.0
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python
31
+ uses: actions/setup-python@v6
32
+ with:
33
+ python-version: "3.14"
34
+
35
+ - name: Install dependencies
36
+ run: uv sync --locked
37
+
38
+ - name: Check formatting
39
+ run: uv run ruff format --check .
40
+
41
+ - name: Lint
42
+ run: uv run ruff check .
43
+
44
+ - name: Type check
45
+ run: uv run ty check
46
+
47
+ - name: Test
48
+ run: uv run pytest
@@ -0,0 +1,139 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+ inputs:
9
+ bump:
10
+ description: "Version bump to commit before publishing"
11
+ required: true
12
+ default: patch
13
+ type: choice
14
+ options:
15
+ - none
16
+ - patch
17
+ - minor
18
+ - major
19
+ confirm:
20
+ description: "Type 'release' to publish from main"
21
+ required: true
22
+ type: string
23
+
24
+ concurrency:
25
+ group: ${{ github.workflow }}-${{ github.ref }}
26
+ cancel-in-progress: true
27
+
28
+ permissions:
29
+ contents: write
30
+ id-token: write
31
+
32
+ env:
33
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
34
+
35
+ jobs:
36
+ build:
37
+ name: Build release artifacts
38
+ runs-on: ubuntu-latest
39
+ environment: pypi
40
+
41
+ steps:
42
+ - name: Confirm manual release
43
+ if: github.event_name == 'workflow_dispatch'
44
+ run: |
45
+ if [ "${GITHUB_REF}" != "refs/heads/main" ]; then
46
+ echo "Manual releases must be run from main, got ${GITHUB_REF}." >&2
47
+ exit 1
48
+ fi
49
+ if [ "${{ inputs.confirm }}" != "release" ]; then
50
+ echo "Manual releases require confirm=release." >&2
51
+ exit 1
52
+ fi
53
+
54
+ - name: Check out repository
55
+ uses: actions/checkout@v6
56
+ with:
57
+ fetch-depth: 0
58
+
59
+ - name: Install uv
60
+ uses: astral-sh/setup-uv@v8.1.0
61
+ with:
62
+ enable-cache: true
63
+
64
+ - name: Set up Python
65
+ uses: actions/setup-python@v6
66
+ with:
67
+ python-version: "3.14"
68
+
69
+ - name: Bump release version
70
+ if: github.event_name == 'workflow_dispatch' && inputs.bump != 'none'
71
+ env:
72
+ BUMP: ${{ inputs.bump }}
73
+ run: |
74
+ python scripts/bump-version.py --bump "$BUMP"
75
+ uv lock
76
+
77
+ version="$(python - <<'PY'
78
+ import tomllib
79
+
80
+ with open("pyproject.toml", "rb") as pyproject:
81
+ print(tomllib.load(pyproject)["project"]["version"])
82
+ PY
83
+ )"
84
+
85
+ git config user.name "github-actions[bot]"
86
+ git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
87
+ git add pyproject.toml uv.lock
88
+ git commit -m "chore: release ${version}"
89
+ git push origin HEAD:main
90
+
91
+ - name: Read project version
92
+ id: project
93
+ run: |
94
+ version="$(python - <<'PY'
95
+ import tomllib
96
+
97
+ with open("pyproject.toml", "rb") as pyproject:
98
+ print(tomllib.load(pyproject)["project"]["version"])
99
+ PY
100
+ )"
101
+ echo "version=${version}" >> "${GITHUB_OUTPUT}"
102
+
103
+ - name: Build and smoke-test release artifacts
104
+ run: ./scripts/check-release.sh
105
+
106
+ - name: Publish to PyPI
107
+ if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
108
+ run: uv publish --trusted-publishing automatic dist/*
109
+
110
+ - name: Restart Hugging Face Space
111
+ if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
112
+ env:
113
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
114
+ run: |
115
+ if [ -z "${HF_TOKEN}" ]; then
116
+ echo "HF_TOKEN secret is not configured; skipping Space restart."
117
+ exit 0
118
+ fi
119
+ uv run python - <<'PY'
120
+ from huggingface_hub import HfApi
121
+
122
+ HfApi().restart_space("evalstate/hf-discover")
123
+ PY
124
+
125
+ - name: Create GitHub release
126
+ if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
127
+ env:
128
+ GH_TOKEN: ${{ github.token }}
129
+ VERSION: ${{ steps.project.outputs.version }}
130
+ run: |
131
+ tag="v${VERSION}"
132
+ if gh release view "${tag}" >/dev/null 2>&1; then
133
+ gh release upload "${tag}" dist/* --clobber
134
+ else
135
+ gh release create "${tag}" dist/* \
136
+ --title "${tag}" \
137
+ --generate-notes \
138
+ --target "$(git rev-parse HEAD)"
139
+ fi
@@ -0,0 +1,91 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ out/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py.cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+ cover/
52
+
53
+ # Environments
54
+ .env
55
+ .envrc
56
+ .venv/
57
+ env/
58
+ venv/
59
+ ENV/
60
+ env.bak/
61
+ venv.bak/
62
+
63
+ # Tool caches
64
+ .ruff_cache/
65
+ .mypy_cache/
66
+ .pyre/
67
+ .pytype/
68
+ .dmypy.json
69
+ dmypy.json
70
+
71
+ # Project-local agent/tooling state
72
+ .agents/
73
+ .codex/
74
+ .fast-agent/
75
+ fastagent.jsonl
76
+ data.ms/
77
+ meilisearch
78
+
79
+ # Private/local spec checkout used for implementation review.
80
+ spec/ard-spec/
81
+
82
+ # uv lockfile is intentionally committed.
83
+ # uv.lock
84
+
85
+ # PyPI configuration file
86
+ .pypirc
87
+
88
+ # Editor/IDE local state
89
+ .idea/
90
+ .vscode/
91
+ tempCodeRunnerFile.py
@@ -0,0 +1,12 @@
1
+ - ARD specification is in spec/ard.md
2
+ - Keep a high level log of Features and Capabilities in README.md under ##features.
3
+ - Project documentation is a lightweight orientation record, not the full source of truth.
4
+ It should help humans quickly understand the big idea and help future agents know which
5
+ scripts, workflows, specs, and plans to inspect for detailed question-answering. Prefer
6
+ concise summaries with explicit artifact pointers over duplicating shell/script logic.
7
+ - Do NOT test things that the `ty` typechecker automatically enforces.
8
+ - Any HF_TOKEN usage must only remain temporarily in memory during a request scope, and must never be stored or emitted in a plain format.
9
+ - Avoid mocking or `monkeypatch` for testing purposes, preferring to use typechecks, simple logic-focussed unit tests and stubs/simulators. Small integration or e2e tests are preferred over lots of unit tests for tightly coupled scenarios.
10
+ - Avoid testing properties and other data transfer scenarios unless transformations or other behaviour is involved.
11
+ - Prefer a functional style of programming where possible.
12
+ - Feature additions should consider the CLI surface as well as adherence to the ARD specification. Both CLI and HTTP should wrap the same, clean, well factored core logic.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Hugging Face
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,308 @@
1
+ Metadata-Version: 2.4
2
+ Name: hf-discover
3
+ Version: 1.3.2
4
+ Summary: ARD registry adapter for Hugging Face Spaces
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.10.0
7
+ Requires-Dist: fastapi==0.137.1
8
+ Requires-Dist: huggingface-hub==1.19.0
9
+ Requires-Dist: pydantic==2.13.4
10
+ Requires-Dist: rich==15.0.0
11
+ Requires-Dist: starlette==1.3.1
12
+ Requires-Dist: typer==0.25.1
13
+ Requires-Dist: uvicorn==0.49.0
14
+ Description-Content-Type: text/markdown
15
+
16
+ # hf-discover
17
+
18
+ A small ARD registry adapter for Hugging Face Skills and Spaces.
19
+
20
+ It exposes Hugging Face discovery as:
21
+
22
+ - a CLI: `hf-discover search "remove background from image"`
23
+ - version introspection: `hf-discover --version`
24
+ - a hosted ARD registry client: `hf-discover search "remove background from image"`
25
+ - a generic ARD registry client: `hf-discover search --registry-url https://registry.example "remove background from image"`
26
+ - a primary ARD REST API combining indexed Hugging Face Skills and Hugging Face
27
+ Spaces: `POST /search`
28
+ - a targeted nested Hugging Face Spaces registry: `POST /registries/huggingface/spaces/search`
29
+ - generated skill artifacts for Spaces via `GET /skills/huggingface/{owner}/{space}/SKILL.md`
30
+ - generated MCP Registry descriptors for MCP Spaces via
31
+ `GET /mcp/huggingface/{owner}/{space}/server.json`
32
+
33
+ The hosted REST API combines Skills and Spaces in the primary registry so simple clients
34
+ only need to call `POST /search`. The nested Spaces registry remains available for clients
35
+ that want targeted Spaces-only discovery or explicit registry traversal.
36
+
37
+ ## Features
38
+
39
+ ### Space Search and Skill Generation
40
+
41
+ `hf-discover` exposes Hugging Face Spaces semantic search in the primary `/search` endpoint
42
+ and as a targeted nested registry backend at `/registries/huggingface/spaces/search`.
43
+ Search requests use the Hub's agent-oriented semantic search (`agents=true`) and return
44
+ matching Spaces as ARD catalog entries. By default, results can include generated
45
+ `application/ai-skill` artifacts, plus `application/mcp-server-card+json` entries for
46
+ matching Spaces tagged `mcp-server`.
47
+
48
+ Search responses strictly include only Spaces whose runtime stage is `RUNNING`, so returned
49
+ entries are limited to Spaces that are currently ready to serve traffic. The runtime stage
50
+ is also surfaced in result metadata as `runtimeStage`.
51
+
52
+ The generated skill wraps the Space's `agents.md` instructions with the required skill
53
+ frontmatter (`name` and `description`) plus source metadata such as the Space ID, Hub URL,
54
+ app URL, and original `agents.md` URL. This lets clients discover a relevant Space, fetch
55
+ the generated skill, and install or load it using their normal skill flow.
56
+
57
+ For clients that want raw Space descriptors instead of skills, request
58
+ `application/vnd.huggingface.space+json` from either the primary search endpoint or the
59
+ nested Spaces search endpoint.
60
+
61
+ Requests for `application/mcp-server-card+json` add `filter=mcp-server` to the downstream
62
+ Hub search and return MCP server card catalog entries that point at this adapter's
63
+ generated `server.json` descriptor route. The legacy `application/mcp-server+json` filter
64
+ is accepted as a transition alias, but new responses use the `*-card+json` media type
65
+ from the pinned ARD spec. Fetching that descriptor performs a direct Hugging Face Space
66
+ info lookup, verifies the Space is tagged `mcp-server`, and synthesizes an MCP
67
+ Registry-style document whose `remotes[]` contains the Space's Gradio `streamable-http`
68
+ MCP endpoint. When Hub runtime metadata includes a Space domain, that domain is used for
69
+ app and MCP URLs; otherwise the adapter falls back to the standard `.hf.space` slug
70
+ convention.
71
+
72
+ The CLI queries the hosted hf-discover deployment by default and can query any
73
+ ARD-compatible registry by passing `--registry-url`. The value may be either a registry
74
+ base URL or the `/search` endpoint. In this mode the CLI POSTs an ARD
75
+ `SearchRequest` and renders the returned `SearchResponse` using the same JSON/table output
76
+ paths as the Hugging Face Spaces adapter. Pass `--local` to search directly from the
77
+ current process instead.
78
+
79
+ ### Combined Skills and Spaces Registry
80
+
81
+ The primary HTTP `POST /search` endpoint combines the Meilisearch-backed
82
+ `huggingface/skills` index with Hugging Face Spaces search. For omitted media type or
83
+ `application/ai-skill`, it can return both indexed `SKILL.md` artifacts and generated Space
84
+ skills in one ranked response. Section-level Skills index hits are grouped into skill-level
85
+ search results.
86
+
87
+ Indexed Hugging Face Skills are directory-style skills, so their search result `url` points
88
+ at the skill directory, not the contained `SKILL.md` file. Generated Space skills are
89
+ single-file artifacts materialized by this adapter and continue to point at the generated
90
+ `/skills/huggingface/{owner}/{space}/SKILL.md` URL.
91
+
92
+ For `application/vnd.huggingface.space+json` and `application/mcp-server-card+json`,
93
+ primary search routes directly to the Spaces backend because those media types are
94
+ Space-specific.
95
+
96
+ The registry uses the ARD v0.5 search envelope: artifact type constraints are
97
+ expressed as `query.filter.type`, response entries use the catalog `type` field, and
98
+ Hugging Face entries use domain-anchored `urn:ai:hf.co:...` identifiers. Catalog entry
99
+ models enforce the v0.5 strict value-or-reference rule, domain-anchored `urn:ai:<fqdn>:...`
100
+ identifiers, and integer 0-100 relevance scores.
101
+
102
+ Structured filters use the ARD v0.5 field-path semantics for exact matching after
103
+ retrieval: scalar filter values are treated like single-item arrays, values within one
104
+ filter key are ORed, different filter keys are ANDed, arrays on entries match when any
105
+ item matches, nested paths such as `metadata.sourceType` are supported, and `publisher`
106
+ is derived from the entry identifier's publisher domain.
107
+
108
+ The primary server exposes `GET /.well-known/ai-catalog.json` as an ARD discovery
109
+ document. It advertises the primary Hugging Face Discover registry and the nested
110
+ Spaces registry as `application/ai-registry+json` entries using v0.5 `type` fields and
111
+ domain-anchored `urn:ai:hf.co:...` identifiers.
112
+
113
+ By default, advertised registry, generated Space skill, and generated MCP `server.json`
114
+ URLs are derived from the incoming request base URL, because those URLs point at
115
+ materialized artifacts and search routes served by this adapter. Set
116
+ `DISCOVER_PUBLIC_BASE_URL` only when a reverse proxy, staging deployment, or self-hosted
117
+ runtime reports an internal base URL but clients need a different public prefix.
118
+ Space-owned URLs such as `agents.md`, app URLs, and MCP endpoints continue to point at
119
+ Hugging Face Space URLs derived from Hub/runtime metadata.
120
+
121
+ When clients request referrals with top-level `federation` set to `referrals` or `auto`, the
122
+ primary registry can still include a referral to the nested Hugging Face Spaces registry.
123
+ Simple clients can ignore referrals and use the combined results; traversal-capable clients
124
+ can use the referral for a follow-up Spaces-only search.
125
+
126
+ ### Challenge Registry Server
127
+
128
+ `hf-discover challenge serve` runs a deterministic local fixture registry for client
129
+ development. It returns mixed ARD result types, including skills, MCP servers, A2A
130
+ agents, ai-catalog bundles, registry entries, referrals, empty registries, and nested
131
+ registries. Use it to test clients that need to follow registry trees and fetch referenced
132
+ artifacts without relying on Hugging Face or Meilisearch services.
133
+
134
+ `hf-discover challenge search` queries a running challenge registry and defaults to
135
+ requesting referrals, making it a convenient CLI path for agents that need to practice
136
+ ARD traversal. The generic `hf-discover search` command defaults to the hosted
137
+ deployment and also accepts `--registry-url` and `--federation none|referrals|auto`. When
138
+ registry-backed commands are run with `--json`, the CLI prints the registry's raw
139
+ `SearchResponse` body so clients can inspect exact `results`, `referrals`, `type`,
140
+ `url`, `data`, and `pageToken` fields returned by the server.
141
+
142
+ The challenge registry uses the same catalog-entry model and field-path filtering helper
143
+ as the primary server. Both registries expose `POST /explore` and return `501 Not
144
+ Implemented`, matching the ARD v0.5 behavior for registries that do not implement the
145
+ optional Explore facets API.
146
+
147
+ ### Specification References
148
+
149
+ `spec/ard.md` remains the committed local ARD orientation snapshot. When a private pinned
150
+ upstream spec checkout is available locally at `spec/ard-spec/`, use its `spec/ard.md`,
151
+ ADRs, schemas, and conformance CLI as the authoritative artifacts for implementation
152
+ review. That checkout is intentionally gitignored.
153
+
154
+ The AI Catalog draft reference can be refreshed from the upstream `Agent-Card/ai-catalog`
155
+ repository with
156
+ `./scripts/update-ai-catalog-spec.sh`, which copies the latest Markdown and JSON assets
157
+ from its `specification/` folder into `spec/ai-catalog/`.
158
+
159
+ The vendored `spec/ai-catalog/` snapshot currently tracks the pre-merge content from
160
+ `Agent-Card/ai-catalog` PR #37, which updates catalog entries from `mediaType` to `type`.
161
+
162
+ ### Roadmap
163
+
164
+ The next `hf-discover` version is expected to expand the CLI surface for arbitrary ARD
165
+ structured filters. Today the HTTP server accepts `query.filter` and applies exact-match
166
+ field-path filters after retrieval; the CLI exposes only the common media-type path
167
+ through `--kind`. Planned work includes improved server-side handling/pushdown for common
168
+ fields such as tags and Space SDK.
169
+
170
+ It will also use "auto" federation.
171
+
172
+ ### Release Automation
173
+
174
+ Releases are built through the same quality gates as CI: locked dependency sync, Ruff
175
+ format/lint checks, `ty` type checking, and pytest. The package supports the same minimum
176
+ Python version as `huggingface_hub` (`>=3.10.0`). The hosted Hugging Face Space deployment
177
+ uses Python 3.14 for runtime performance.
178
+
179
+ Run the release check with:
180
+
181
+ ```bash
182
+ ./scripts/check-release.sh
183
+ ```
184
+
185
+ Optionally assert the expected project version:
186
+
187
+ ```bash
188
+ ./scripts/check-release.sh 0.1.0
189
+ ```
190
+
191
+ Release from `main` after the intended code changes are merged. Run the **Release** GitHub
192
+ Action from `main`, choose `patch`, `minor`, or `major` for the version bump, and enter
193
+ confirmation value `release`. The workflow commits the `pyproject.toml` and `uv.lock`
194
+ version bump directly to `main`, builds artifacts from that bumped commit, publishes them
195
+ to PyPI using trusted publishing, attaches the artifacts to the GitHub Release, and
196
+ restarts the Hugging Face Space when the `HF_TOKEN` secret is configured. Use bump value
197
+ `none` only when retrying a failed release for the version already on `main`.
198
+
199
+ For local preflight or manual version changes, use
200
+ `python scripts/bump-version.py --bump patch|minor|major`, then `uv lock`, then
201
+ `./scripts/check-release.sh`.
202
+
203
+ PyPI trusted publishing must be configured for project `hf-discover` with owner
204
+ `huggingface`, repository `hf-discover`, workflow `release.yml`, and environment `pypi`.
205
+ The GitHub `pypi` environment does not need secrets for trusted publishing, but it must
206
+ exist if the repository requires explicit environment configuration.
207
+
208
+ ### Hugging Face Space Deployment
209
+
210
+ The project includes a reproducible Docker Space definition in `deploy/huggingface-space/`.
211
+ It uses the official uv Python image and runs the latest published `hf-discover` package
212
+ with `uvx --refresh`, so restarting or rebuilding the Space resolves the newest PyPI
213
+ release without committing generated application code to the Space repository. This keeps
214
+ the hosted Space lightweight while letting PyPI releases drive runtime updates.
215
+
216
+ The Space startup wrapper can optionally run a pinned Meilisearch binary from an attached
217
+ Hugging Face bucket and ingest a generated Hugging Face Skills index artifact from another
218
+ attached bucket. When Meilisearch starts successfully, the wrapper exports the configured
219
+ Meilisearch URL and index for the API process so `POST /search` includes loaded Skills
220
+ results alongside Spaces results. Helper scripts in `scripts/` vendor the pinned
221
+ Meilisearch binary, create the configured buckets, attach them as Space volumes, and
222
+ configure runtime variables without running unsupervised installer scripts in the Space.
223
+
224
+ The documentation here is intentionally an orientation record: it states the deployment
225
+ idea and points to the artifacts that contain the operational evidence. For details, read
226
+ `hf-discover.toml`, `scripts/vendor-meilisearch.py`,
227
+ `scripts/configure-space-runtime.py`, and
228
+ `deploy/huggingface-space/start-discover.sh`.
229
+
230
+ ## Usage
231
+
232
+ The examples below use the standalone `hf-discover` command form.
233
+
234
+ `--kind skill` requests AI-skill results. In the combined registry this includes indexed,
235
+ directory-style Hugging Face Skills from Meilisearch and generated single-file Space skill
236
+ wrappers. `--kind space` requests raw Hugging Face Space descriptors. `--kind mcp` requests
237
+ MCP server entries for Spaces tagged `mcp-server`. `--kind all` asks for the default mixed
238
+ view.
239
+
240
+ ```bash
241
+ > hf-discover --version
242
+ > hf-discover search "generate image" --limit 5
243
+ > hf-discover search "generate image" --kind skill --json
244
+ > hf-discover search "generate image" --kind space --json
245
+ > hf-discover search "generate image" --kind mcp --json
246
+ > hf-discover mcp-server-json mcp-tools/FLUX.1-Kontext-Dev
247
+ > hf-discover search --registry-url https://registry.example "generate image" --kind skill --json
248
+ > hf-discover search "generate image" --kind space --local
249
+ > hf-discover serve --port 8080
250
+ > hf-discover challenge serve --port 8090
251
+ > hf-discover challenge search "find tools and registries" --federation referrals --json
252
+ ```
253
+
254
+ ### Recommended `hf` extension usage
255
+
256
+ For Hugging Face CLI users, the recommended install path is as an `hf` extension:
257
+
258
+ ```bash
259
+ > hf extensions install huggingface/hf-discover
260
+ > hf discover --version
261
+ > hf discover search "generate image" --kind space --limit 5
262
+ ```
263
+
264
+ The project still documents examples as `hf-discover ...` because the same CLI is also
265
+ available as a standalone Python console script. When installed as an extension, replace
266
+ `hf-discover` with `hf discover`.
267
+
268
+ ```bash
269
+ > curl -X POST http://localhost:8080/search \
270
+ -H 'content-type: application/json' \
271
+ -d '{"query":{"text":"upload files to a dataset repo","filter":{"type":["application/ai-skill"]}},"pageSize":5}'
272
+ ```
273
+
274
+ Search the targeted nested Spaces registry:
275
+
276
+ ```bash
277
+ > curl -X POST http://localhost:8080/registries/huggingface/spaces/search \
278
+ -H 'content-type: application/json' \
279
+ -d '{"query":{"text":"remove background from image","filter":{"type":["application/ai-skill"]}},"pageSize":5}'
280
+ ```
281
+
282
+ Search the local challenge registry:
283
+
284
+ ```bash
285
+ > curl -X POST http://localhost:8090/search \
286
+ -H 'content-type: application/json' \
287
+ -d '{"query":{"text":"find tools and registries"},"federation":"referrals","pageSize":10}'
288
+ ```
289
+
290
+ Fetch a generated skill:
291
+
292
+ ```bash
293
+ > curl http://localhost:8080/skills/huggingface/mcp-tools/FLUX.1-Kontext-Dev/SKILL.md
294
+ ```
295
+
296
+ To get generic Hugging Face Space descriptors instead of skill wrappers, request:
297
+
298
+ ```json
299
+ {"query":{"text":"remove background from image","filter":{"type":["application/vnd.huggingface.space+json"]}},"pageSize":5}
300
+ ```
301
+
302
+ ### HF_TOKEN handling
303
+
304
+ Primary and nested Spaces registry search requests can forward a request-scoped Hugging
305
+ Face token for the downstream Spaces search call. The server checks
306
+ `X-HF-Authorization: Bearer ...`, then `Authorization: Bearer ...`, then `HF_TOKEN: ...`;
307
+ a header token overrides any token configured when the server starts and is not stored
308
+ beyond the request.