hf-discover 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hf_discover-1.3.2/.github/dependabot.yml +11 -0
- hf_discover-1.3.2/.github/workflows/ci.yml +48 -0
- hf_discover-1.3.2/.github/workflows/release.yml +139 -0
- hf_discover-1.3.2/.gitignore +91 -0
- hf_discover-1.3.2/AGENTS.md +12 -0
- hf_discover-1.3.2/LICENSE +21 -0
- hf_discover-1.3.2/PKG-INFO +308 -0
- hf_discover-1.3.2/README.md +293 -0
- hf_discover-1.3.2/deploy/huggingface-space/.dockerignore +4 -0
- hf_discover-1.3.2/deploy/huggingface-space/Dockerfile +18 -0
- hf_discover-1.3.2/deploy/huggingface-space/README.md +75 -0
- hf_discover-1.3.2/deploy/huggingface-space/start-discover.sh +216 -0
- hf_discover-1.3.2/docs/file-tools-eval-report.html +10952 -0
- hf_discover-1.3.2/docs/repository-overview.html +197 -0
- hf_discover-1.3.2/docs/skills-search-design-deployment-options.html +259 -0
- hf_discover-1.3.2/hf-discover.toml +12 -0
- hf_discover-1.3.2/plan/skills-search.md +641 -0
- hf_discover-1.3.2/pyproject.toml +93 -0
- hf_discover-1.3.2/scripts/bump-version.py +85 -0
- hf_discover-1.3.2/scripts/check-release.sh +98 -0
- hf_discover-1.3.2/scripts/configure-space-runtime.py +143 -0
- hf_discover-1.3.2/scripts/smoke-local-registries.sh +83 -0
- hf_discover-1.3.2/scripts/update-ai-catalog-spec.sh +101 -0
- hf_discover-1.3.2/scripts/vendor-meilisearch.py +144 -0
- hf_discover-1.3.2/spec/ai-catalog/SOURCE.md +18 -0
- hf_discover-1.3.2/spec/ai-catalog/ai-catalog.md +2211 -0
- hf_discover-1.3.2/spec/ai-catalog/examples/ai-catalog.json +34 -0
- hf_discover-1.3.2/spec/ai-catalog/respec-config.json +33 -0
- hf_discover-1.3.2/spec/ard.md +780 -0
- hf_discover-1.3.2/spec/hf-search.md +189 -0
- hf_discover-1.3.2/src/discover/__init__.py +1 -0
- hf_discover-1.3.2/src/discover/challenge.py +451 -0
- hf_discover-1.3.2/src/discover/cli.py +624 -0
- hf_discover-1.3.2/src/discover/filters.py +65 -0
- hf_discover-1.3.2/src/discover/hf_search.py +111 -0
- hf_discover-1.3.2/src/discover/hf_skills.py +199 -0
- hf_discover-1.3.2/src/discover/hf_spaces.py +451 -0
- hf_discover-1.3.2/src/discover/models.py +98 -0
- hf_discover-1.3.2/src/discover/server.py +754 -0
- hf_discover-1.3.2/tests/test_challenge.py +126 -0
- hf_discover-1.3.2/tests/test_cli.py +172 -0
- hf_discover-1.3.2/tests/test_hf_spaces.py +1145 -0
- hf_discover-1.3.2/tests/test_models.py +48 -0
- hf_discover-1.3.2/typesafe.md +25 -0
- hf_discover-1.3.2/uv.lock +760 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
concurrency:
|
|
10
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
11
|
+
cancel-in-progress: true
|
|
12
|
+
|
|
13
|
+
env:
|
|
14
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
quality:
|
|
18
|
+
name: Ruff, ty, and pytest
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Check out repository
|
|
23
|
+
uses: actions/checkout@v6
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v8.1.0
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Set up Python
|
|
31
|
+
uses: actions/setup-python@v6
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.14"
|
|
34
|
+
|
|
35
|
+
- name: Install dependencies
|
|
36
|
+
run: uv sync --locked
|
|
37
|
+
|
|
38
|
+
- name: Check formatting
|
|
39
|
+
run: uv run ruff format --check .
|
|
40
|
+
|
|
41
|
+
- name: Lint
|
|
42
|
+
run: uv run ruff check .
|
|
43
|
+
|
|
44
|
+
- name: Type check
|
|
45
|
+
run: uv run ty check
|
|
46
|
+
|
|
47
|
+
- name: Test
|
|
48
|
+
run: uv run pytest
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
bump:
|
|
10
|
+
description: "Version bump to commit before publishing"
|
|
11
|
+
required: true
|
|
12
|
+
default: patch
|
|
13
|
+
type: choice
|
|
14
|
+
options:
|
|
15
|
+
- none
|
|
16
|
+
- patch
|
|
17
|
+
- minor
|
|
18
|
+
- major
|
|
19
|
+
confirm:
|
|
20
|
+
description: "Type 'release' to publish from main"
|
|
21
|
+
required: true
|
|
22
|
+
type: string
|
|
23
|
+
|
|
24
|
+
concurrency:
|
|
25
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
26
|
+
cancel-in-progress: true
|
|
27
|
+
|
|
28
|
+
permissions:
|
|
29
|
+
contents: write
|
|
30
|
+
id-token: write
|
|
31
|
+
|
|
32
|
+
env:
|
|
33
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
|
34
|
+
|
|
35
|
+
jobs:
|
|
36
|
+
build:
|
|
37
|
+
name: Build release artifacts
|
|
38
|
+
runs-on: ubuntu-latest
|
|
39
|
+
environment: pypi
|
|
40
|
+
|
|
41
|
+
steps:
|
|
42
|
+
- name: Confirm manual release
|
|
43
|
+
if: github.event_name == 'workflow_dispatch'
|
|
44
|
+
run: |
|
|
45
|
+
if [ "${GITHUB_REF}" != "refs/heads/main" ]; then
|
|
46
|
+
echo "Manual releases must be run from main, got ${GITHUB_REF}." >&2
|
|
47
|
+
exit 1
|
|
48
|
+
fi
|
|
49
|
+
if [ "${{ inputs.confirm }}" != "release" ]; then
|
|
50
|
+
echo "Manual releases require confirm=release." >&2
|
|
51
|
+
exit 1
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
- name: Check out repository
|
|
55
|
+
uses: actions/checkout@v6
|
|
56
|
+
with:
|
|
57
|
+
fetch-depth: 0
|
|
58
|
+
|
|
59
|
+
- name: Install uv
|
|
60
|
+
uses: astral-sh/setup-uv@v8.1.0
|
|
61
|
+
with:
|
|
62
|
+
enable-cache: true
|
|
63
|
+
|
|
64
|
+
- name: Set up Python
|
|
65
|
+
uses: actions/setup-python@v6
|
|
66
|
+
with:
|
|
67
|
+
python-version: "3.14"
|
|
68
|
+
|
|
69
|
+
- name: Bump release version
|
|
70
|
+
if: github.event_name == 'workflow_dispatch' && inputs.bump != 'none'
|
|
71
|
+
env:
|
|
72
|
+
BUMP: ${{ inputs.bump }}
|
|
73
|
+
run: |
|
|
74
|
+
python scripts/bump-version.py --bump "$BUMP"
|
|
75
|
+
uv lock
|
|
76
|
+
|
|
77
|
+
version="$(python - <<'PY'
|
|
78
|
+
import tomllib
|
|
79
|
+
|
|
80
|
+
with open("pyproject.toml", "rb") as pyproject:
|
|
81
|
+
print(tomllib.load(pyproject)["project"]["version"])
|
|
82
|
+
PY
|
|
83
|
+
)"
|
|
84
|
+
|
|
85
|
+
git config user.name "github-actions[bot]"
|
|
86
|
+
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
87
|
+
git add pyproject.toml uv.lock
|
|
88
|
+
git commit -m "chore: release ${version}"
|
|
89
|
+
git push origin HEAD:main
|
|
90
|
+
|
|
91
|
+
- name: Read project version
|
|
92
|
+
id: project
|
|
93
|
+
run: |
|
|
94
|
+
version="$(python - <<'PY'
|
|
95
|
+
import tomllib
|
|
96
|
+
|
|
97
|
+
with open("pyproject.toml", "rb") as pyproject:
|
|
98
|
+
print(tomllib.load(pyproject)["project"]["version"])
|
|
99
|
+
PY
|
|
100
|
+
)"
|
|
101
|
+
echo "version=${version}" >> "${GITHUB_OUTPUT}"
|
|
102
|
+
|
|
103
|
+
- name: Build and smoke-test release artifacts
|
|
104
|
+
run: ./scripts/check-release.sh
|
|
105
|
+
|
|
106
|
+
- name: Publish to PyPI
|
|
107
|
+
if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
|
|
108
|
+
run: uv publish --trusted-publishing automatic dist/*
|
|
109
|
+
|
|
110
|
+
- name: Restart Hugging Face Space
|
|
111
|
+
if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
|
|
112
|
+
env:
|
|
113
|
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
|
114
|
+
run: |
|
|
115
|
+
if [ -z "${HF_TOKEN}" ]; then
|
|
116
|
+
echo "HF_TOKEN secret is not configured; skipping Space restart."
|
|
117
|
+
exit 0
|
|
118
|
+
fi
|
|
119
|
+
uv run python - <<'PY'
|
|
120
|
+
from huggingface_hub import HfApi
|
|
121
|
+
|
|
122
|
+
HfApi().restart_space("evalstate/hf-discover")
|
|
123
|
+
PY
|
|
124
|
+
|
|
125
|
+
- name: Create GitHub release
|
|
126
|
+
if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
|
|
127
|
+
env:
|
|
128
|
+
GH_TOKEN: ${{ github.token }}
|
|
129
|
+
VERSION: ${{ steps.project.outputs.version }}
|
|
130
|
+
run: |
|
|
131
|
+
tag="v${VERSION}"
|
|
132
|
+
if gh release view "${tag}" >/dev/null 2>&1; then
|
|
133
|
+
gh release upload "${tag}" dist/* --clobber
|
|
134
|
+
else
|
|
135
|
+
gh release create "${tag}" dist/* \
|
|
136
|
+
--title "${tag}" \
|
|
137
|
+
--generate-notes \
|
|
138
|
+
--target "$(git rev-parse HEAD)"
|
|
139
|
+
fi
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
out/
|
|
16
|
+
eggs/
|
|
17
|
+
.eggs/
|
|
18
|
+
lib/
|
|
19
|
+
lib64/
|
|
20
|
+
parts/
|
|
21
|
+
sdist/
|
|
22
|
+
var/
|
|
23
|
+
wheels/
|
|
24
|
+
share/python-wheels/
|
|
25
|
+
*.egg-info/
|
|
26
|
+
.installed.cfg
|
|
27
|
+
*.egg
|
|
28
|
+
MANIFEST
|
|
29
|
+
|
|
30
|
+
# PyInstaller
|
|
31
|
+
*.manifest
|
|
32
|
+
*.spec
|
|
33
|
+
|
|
34
|
+
# Installer logs
|
|
35
|
+
pip-log.txt
|
|
36
|
+
pip-delete-this-directory.txt
|
|
37
|
+
|
|
38
|
+
# Unit test / coverage reports
|
|
39
|
+
htmlcov/
|
|
40
|
+
.tox/
|
|
41
|
+
.nox/
|
|
42
|
+
.coverage
|
|
43
|
+
.coverage.*
|
|
44
|
+
.cache
|
|
45
|
+
nosetests.xml
|
|
46
|
+
coverage.xml
|
|
47
|
+
*.cover
|
|
48
|
+
*.py.cover
|
|
49
|
+
.hypothesis/
|
|
50
|
+
.pytest_cache/
|
|
51
|
+
cover/
|
|
52
|
+
|
|
53
|
+
# Environments
|
|
54
|
+
.env
|
|
55
|
+
.envrc
|
|
56
|
+
.venv/
|
|
57
|
+
env/
|
|
58
|
+
venv/
|
|
59
|
+
ENV/
|
|
60
|
+
env.bak/
|
|
61
|
+
venv.bak/
|
|
62
|
+
|
|
63
|
+
# Tool caches
|
|
64
|
+
.ruff_cache/
|
|
65
|
+
.mypy_cache/
|
|
66
|
+
.pyre/
|
|
67
|
+
.pytype/
|
|
68
|
+
.dmypy.json
|
|
69
|
+
dmypy.json
|
|
70
|
+
|
|
71
|
+
# Project-local agent/tooling state
|
|
72
|
+
.agents/
|
|
73
|
+
.codex/
|
|
74
|
+
.fast-agent/
|
|
75
|
+
fastagent.jsonl
|
|
76
|
+
data.ms/
|
|
77
|
+
meilisearch
|
|
78
|
+
|
|
79
|
+
# Private/local spec checkout used for implementation review.
|
|
80
|
+
spec/ard-spec/
|
|
81
|
+
|
|
82
|
+
# uv lockfile is intentionally committed.
|
|
83
|
+
# uv.lock
|
|
84
|
+
|
|
85
|
+
# PyPI configuration file
|
|
86
|
+
.pypirc
|
|
87
|
+
|
|
88
|
+
# Editor/IDE local state
|
|
89
|
+
.idea/
|
|
90
|
+
.vscode/
|
|
91
|
+
tempCodeRunnerFile.py
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
- ARD specification is in spec/ard.md
|
|
2
|
+
- Keep a high level log of Features and Capabilities in README.md under ##features.
|
|
3
|
+
- Project documentation is a lightweight orientation record, not the full source of truth.
|
|
4
|
+
It should help humans quickly understand the big idea and help future agents know which
|
|
5
|
+
scripts, workflows, specs, and plans to inspect for detailed question-answering. Prefer
|
|
6
|
+
concise summaries with explicit artifact pointers over duplicating shell/script logic.
|
|
7
|
+
- Do NOT test things that the `ty` typechecker automatically enforces.
|
|
8
|
+
- Any HF_TOKEN usage must only remain temporarily in memory during a request scope, and must never be stored or emitted in a plain format.
|
|
9
|
+
- Avoid mocking or `monkeypatch` for testing purposes, preferring to use typechecks, simple logic-focussed unit tests and stubs/simulators. Small integration or e2e tests are preferred over lots of unit tests for tightly coupled scenarios.
|
|
10
|
+
- Avoid testing properties and other data transfer scenarios unless transformations or other behaviour is involved.
|
|
11
|
+
- Prefer a functional style of programming where possible.
|
|
12
|
+
- Feature additions should consider the CLI surface as well as adherence to the ARD specification. Both CLI and HTTP should wrap the same, clean, well factored core logic.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Hugging Face
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hf-discover
|
|
3
|
+
Version: 1.3.2
|
|
4
|
+
Summary: ARD registry adapter for Hugging Face Spaces
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10.0
|
|
7
|
+
Requires-Dist: fastapi==0.137.1
|
|
8
|
+
Requires-Dist: huggingface-hub==1.19.0
|
|
9
|
+
Requires-Dist: pydantic==2.13.4
|
|
10
|
+
Requires-Dist: rich==15.0.0
|
|
11
|
+
Requires-Dist: starlette==1.3.1
|
|
12
|
+
Requires-Dist: typer==0.25.1
|
|
13
|
+
Requires-Dist: uvicorn==0.49.0
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# hf-discover
|
|
17
|
+
|
|
18
|
+
A small ARD registry adapter for Hugging Face Skills and Spaces.
|
|
19
|
+
|
|
20
|
+
It exposes Hugging Face discovery as:
|
|
21
|
+
|
|
22
|
+
- a CLI: `hf-discover search "remove background from image"`
|
|
23
|
+
- version introspection: `hf-discover --version`
|
|
24
|
+
- a hosted ARD registry client: `hf-discover search "remove background from image"`
|
|
25
|
+
- a generic ARD registry client: `hf-discover search --registry-url https://registry.example "remove background from image"`
|
|
26
|
+
- a primary ARD REST API combining indexed Hugging Face Skills and Hugging Face
|
|
27
|
+
Spaces: `POST /search`
|
|
28
|
+
- a targeted nested Hugging Face Spaces registry: `POST /registries/huggingface/spaces/search`
|
|
29
|
+
- generated skill artifacts for Spaces via `GET /skills/huggingface/{owner}/{space}/SKILL.md`
|
|
30
|
+
- generated MCP Registry descriptors for MCP Spaces via
|
|
31
|
+
`GET /mcp/huggingface/{owner}/{space}/server.json`
|
|
32
|
+
|
|
33
|
+
The hosted REST API combines Skills and Spaces in the primary registry so simple clients
|
|
34
|
+
only need to call `POST /search`. The nested Spaces registry remains available for clients
|
|
35
|
+
that want targeted Spaces-only discovery or explicit registry traversal.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
### Space Search and Skill Generation
|
|
40
|
+
|
|
41
|
+
`hf-discover` exposes Hugging Face Spaces semantic search in the primary `/search` endpoint
|
|
42
|
+
and as a targeted nested registry backend at `/registries/huggingface/spaces/search`.
|
|
43
|
+
Search requests use the Hub's agent-oriented semantic search (`agents=true`) and return
|
|
44
|
+
matching Spaces as ARD catalog entries. By default, results can include generated
|
|
45
|
+
`application/ai-skill` artifacts, plus `application/mcp-server-card+json` entries for
|
|
46
|
+
matching Spaces tagged `mcp-server`.
|
|
47
|
+
|
|
48
|
+
Search responses strictly include only Spaces whose runtime stage is `RUNNING`, so returned
|
|
49
|
+
entries are limited to Spaces that are currently ready to serve traffic. The runtime stage
|
|
50
|
+
is also surfaced in result metadata as `runtimeStage`.
|
|
51
|
+
|
|
52
|
+
The generated skill wraps the Space's `agents.md` instructions with the required skill
|
|
53
|
+
frontmatter (`name` and `description`) plus source metadata such as the Space ID, Hub URL,
|
|
54
|
+
app URL, and original `agents.md` URL. This lets clients discover a relevant Space, fetch
|
|
55
|
+
the generated skill, and install or load it using their normal skill flow.
|
|
56
|
+
|
|
57
|
+
For clients that want raw Space descriptors instead of skills, request
|
|
58
|
+
`application/vnd.huggingface.space+json` from either the primary search endpoint or the
|
|
59
|
+
nested Spaces search endpoint.
|
|
60
|
+
|
|
61
|
+
Requests for `application/mcp-server-card+json` add `filter=mcp-server` to the downstream
|
|
62
|
+
Hub search and return MCP server card catalog entries that point at this adapter's
|
|
63
|
+
generated `server.json` descriptor route. The legacy `application/mcp-server+json` filter
|
|
64
|
+
is accepted as a transition alias, but new responses use the `*-card+json` media type
|
|
65
|
+
from the pinned ARD spec. Fetching that descriptor performs a direct Hugging Face Space
|
|
66
|
+
info lookup, verifies the Space is tagged `mcp-server`, and synthesizes an MCP
|
|
67
|
+
Registry-style document whose `remotes[]` contains the Space's Gradio `streamable-http`
|
|
68
|
+
MCP endpoint. When Hub runtime metadata includes a Space domain, that domain is used for
|
|
69
|
+
app and MCP URLs; otherwise the adapter falls back to the standard `.hf.space` slug
|
|
70
|
+
convention.
|
|
71
|
+
|
|
72
|
+
The CLI queries the hosted hf-discover deployment by default and can query any
|
|
73
|
+
ARD-compatible registry by passing `--registry-url`. The value may be either a registry
|
|
74
|
+
base URL or the `/search` endpoint. In this mode the CLI POSTs an ARD
|
|
75
|
+
`SearchRequest` and renders the returned `SearchResponse` using the same JSON/table output
|
|
76
|
+
paths as the Hugging Face Spaces adapter. Pass `--local` to search directly from the
|
|
77
|
+
current process instead.
|
|
78
|
+
|
|
79
|
+
### Combined Skills and Spaces Registry
|
|
80
|
+
|
|
81
|
+
The primary HTTP `POST /search` endpoint combines the Meilisearch-backed
|
|
82
|
+
`huggingface/skills` index with Hugging Face Spaces search. For omitted media type or
|
|
83
|
+
`application/ai-skill`, it can return both indexed `SKILL.md` artifacts and generated Space
|
|
84
|
+
skills in one ranked response. Section-level Skills index hits are grouped into skill-level
|
|
85
|
+
search results.
|
|
86
|
+
|
|
87
|
+
Indexed Hugging Face Skills are directory-style skills, so their search result `url` points
|
|
88
|
+
at the skill directory, not the contained `SKILL.md` file. Generated Space skills are
|
|
89
|
+
single-file artifacts materialized by this adapter and continue to point at the generated
|
|
90
|
+
`/skills/huggingface/{owner}/{space}/SKILL.md` URL.
|
|
91
|
+
|
|
92
|
+
For `application/vnd.huggingface.space+json` and `application/mcp-server-card+json`,
|
|
93
|
+
primary search routes directly to the Spaces backend because those media types are
|
|
94
|
+
Space-specific.
|
|
95
|
+
|
|
96
|
+
The registry uses the ARD v0.5 search envelope: artifact type constraints are
|
|
97
|
+
expressed as `query.filter.type`, response entries use the catalog `type` field, and
|
|
98
|
+
Hugging Face entries use domain-anchored `urn:ai:hf.co:...` identifiers. Catalog entry
|
|
99
|
+
models enforce the v0.5 strict value-or-reference rule, domain-anchored `urn:ai:<fqdn>:...`
|
|
100
|
+
identifiers, and integer 0-100 relevance scores.
|
|
101
|
+
|
|
102
|
+
Structured filters use the ARD v0.5 field-path semantics for exact matching after
|
|
103
|
+
retrieval: scalar filter values are treated like single-item arrays, values within one
|
|
104
|
+
filter key are ORed, different filter keys are ANDed, arrays on entries match when any
|
|
105
|
+
item matches, nested paths such as `metadata.sourceType` are supported, and `publisher`
|
|
106
|
+
is derived from the entry identifier's publisher domain.
|
|
107
|
+
|
|
108
|
+
The primary server exposes `GET /.well-known/ai-catalog.json` as an ARD discovery
|
|
109
|
+
document. It advertises the primary Hugging Face Discover registry and the nested
|
|
110
|
+
Spaces registry as `application/ai-registry+json` entries using v0.5 `type` fields and
|
|
111
|
+
domain-anchored `urn:ai:hf.co:...` identifiers.
|
|
112
|
+
|
|
113
|
+
By default, advertised registry, generated Space skill, and generated MCP `server.json`
|
|
114
|
+
URLs are derived from the incoming request base URL, because those URLs point at
|
|
115
|
+
materialized artifacts and search routes served by this adapter. Set
|
|
116
|
+
`DISCOVER_PUBLIC_BASE_URL` only when a reverse proxy, staging deployment, or self-hosted
|
|
117
|
+
runtime reports an internal base URL but clients need a different public prefix.
|
|
118
|
+
Space-owned URLs such as `agents.md`, app URLs, and MCP endpoints continue to point at
|
|
119
|
+
Hugging Face Space URLs derived from Hub/runtime metadata.
|
|
120
|
+
|
|
121
|
+
When clients request referrals with top-level `federation` set to `referrals` or `auto`, the
|
|
122
|
+
primary registry can still include a referral to the nested Hugging Face Spaces registry.
|
|
123
|
+
Simple clients can ignore referrals and use the combined results; traversal-capable clients
|
|
124
|
+
can use the referral for a follow-up Spaces-only search.
|
|
125
|
+
|
|
126
|
+
### Challenge Registry Server
|
|
127
|
+
|
|
128
|
+
`hf-discover challenge serve` runs a deterministic local fixture registry for client
|
|
129
|
+
development. It returns mixed ARD result types, including skills, MCP servers, A2A
|
|
130
|
+
agents, ai-catalog bundles, registry entries, referrals, empty registries, and nested
|
|
131
|
+
registries. Use it to test clients that need to follow registry trees and fetch referenced
|
|
132
|
+
artifacts without relying on Hugging Face or Meilisearch services.
|
|
133
|
+
|
|
134
|
+
`hf-discover challenge search` queries a running challenge registry and defaults to
|
|
135
|
+
requesting referrals, making it a convenient CLI path for agents that need to practice
|
|
136
|
+
ARD traversal. The generic `hf-discover search` command defaults to the hosted
|
|
137
|
+
deployment and also accepts `--registry-url` and `--federation none|referrals|auto`. When
|
|
138
|
+
registry-backed commands are run with `--json`, the CLI prints the registry's raw
|
|
139
|
+
`SearchResponse` body so clients can inspect exact `results`, `referrals`, `type`,
|
|
140
|
+
`url`, `data`, and `pageToken` fields returned by the server.
|
|
141
|
+
|
|
142
|
+
The challenge registry uses the same catalog-entry model and field-path filtering helper
|
|
143
|
+
as the primary server. Both registries expose `POST /explore` and return `501 Not
|
|
144
|
+
Implemented`, matching the ARD v0.5 behavior for registries that do not implement the
|
|
145
|
+
optional Explore facets API.
|
|
146
|
+
|
|
147
|
+
### Specification References
|
|
148
|
+
|
|
149
|
+
`spec/ard.md` remains the committed local ARD orientation snapshot. When a private pinned
|
|
150
|
+
upstream spec checkout is available locally at `spec/ard-spec/`, use its `spec/ard.md`,
|
|
151
|
+
ADRs, schemas, and conformance CLI as the authoritative artifacts for implementation
|
|
152
|
+
review. That checkout is intentionally gitignored.
|
|
153
|
+
|
|
154
|
+
The AI Catalog draft reference can be refreshed from the upstream `Agent-Card/ai-catalog`
|
|
155
|
+
repository with
|
|
156
|
+
`./scripts/update-ai-catalog-spec.sh`, which copies the latest Markdown and JSON assets
|
|
157
|
+
from its `specification/` folder into `spec/ai-catalog/`.
|
|
158
|
+
|
|
159
|
+
The vendored `spec/ai-catalog/` snapshot currently tracks the pre-merge content from
|
|
160
|
+
`Agent-Card/ai-catalog` PR #37, which updates catalog entries from `mediaType` to `type`.
|
|
161
|
+
|
|
162
|
+
### Roadmap
|
|
163
|
+
|
|
164
|
+
The next `hf-discover` version is expected to expand the CLI surface for arbitrary ARD
|
|
165
|
+
structured filters. Today the HTTP server accepts `query.filter` and applies exact-match
|
|
166
|
+
field-path filters after retrieval; the CLI exposes only the common media-type path
|
|
167
|
+
through `--kind`. Planned work includes improved server-side handling/pushdown for common
|
|
168
|
+
fields such as tags and Space SDK.
|
|
169
|
+
|
|
170
|
+
It will also use "auto" federation.
|
|
171
|
+
|
|
172
|
+
### Release Automation
|
|
173
|
+
|
|
174
|
+
Releases are built through the same quality gates as CI: locked dependency sync, Ruff
|
|
175
|
+
format/lint checks, `ty` type checking, and pytest. The package supports the same minimum
|
|
176
|
+
Python version as `huggingface_hub` (`>=3.10.0`). The hosted Hugging Face Space deployment
|
|
177
|
+
uses Python 3.14 for runtime performance.
|
|
178
|
+
|
|
179
|
+
Run the release check with:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
./scripts/check-release.sh
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Optionally assert the expected project version:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
./scripts/check-release.sh 0.1.0
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Release from `main` after the intended code changes are merged. Run the **Release** GitHub
|
|
192
|
+
Action from `main`, choose `patch`, `minor`, or `major` for the version bump, and enter
|
|
193
|
+
confirmation value `release`. The workflow commits the `pyproject.toml` and `uv.lock`
|
|
194
|
+
version bump directly to `main`, builds artifacts from that bumped commit, publishes them
|
|
195
|
+
to PyPI using trusted publishing, attaches the artifacts to the GitHub Release, and
|
|
196
|
+
restarts the Hugging Face Space when the `HF_TOKEN` secret is configured. Use bump value
|
|
197
|
+
`none` only when retrying a failed release for the version already on `main`.
|
|
198
|
+
|
|
199
|
+
For local preflight or manual version changes, use
|
|
200
|
+
`python scripts/bump-version.py --bump patch|minor|major`, then `uv lock`, then
|
|
201
|
+
`./scripts/check-release.sh`.
|
|
202
|
+
|
|
203
|
+
PyPI trusted publishing must be configured for project `hf-discover` with owner
|
|
204
|
+
`huggingface`, repository `hf-discover`, workflow `release.yml`, and environment `pypi`.
|
|
205
|
+
The GitHub `pypi` environment does not need secrets for trusted publishing, but it must
|
|
206
|
+
exist if the repository requires explicit environment configuration.
|
|
207
|
+
|
|
208
|
+
### Hugging Face Space Deployment
|
|
209
|
+
|
|
210
|
+
The project includes a reproducible Docker Space definition in `deploy/huggingface-space/`.
|
|
211
|
+
It uses the official uv Python image and runs the latest published `hf-discover` package
|
|
212
|
+
with `uvx --refresh`, so restarting or rebuilding the Space resolves the newest PyPI
|
|
213
|
+
release without committing generated application code to the Space repository. This keeps
|
|
214
|
+
the hosted Space lightweight while letting PyPI releases drive runtime updates.
|
|
215
|
+
|
|
216
|
+
The Space startup wrapper can optionally run a pinned Meilisearch binary from an attached
|
|
217
|
+
Hugging Face bucket and ingest a generated Hugging Face Skills index artifact from another
|
|
218
|
+
attached bucket. When Meilisearch starts successfully, the wrapper exports the configured
|
|
219
|
+
Meilisearch URL and index for the API process so `POST /search` includes loaded Skills
|
|
220
|
+
results alongside Spaces results. Helper scripts in `scripts/` vendor the pinned
|
|
221
|
+
Meilisearch binary, create the configured buckets, attach them as Space volumes, and
|
|
222
|
+
configure runtime variables without running unsupervised installer scripts in the Space.
|
|
223
|
+
|
|
224
|
+
The documentation here is intentionally an orientation record: it states the deployment
|
|
225
|
+
idea and points to the artifacts that contain the operational evidence. For details, read
|
|
226
|
+
`hf-discover.toml`, `scripts/vendor-meilisearch.py`,
|
|
227
|
+
`scripts/configure-space-runtime.py`, and
|
|
228
|
+
`deploy/huggingface-space/start-discover.sh`.
|
|
229
|
+
|
|
230
|
+
## Usage
|
|
231
|
+
|
|
232
|
+
The examples below use the standalone `hf-discover` command form.
|
|
233
|
+
|
|
234
|
+
`--kind skill` requests AI-skill results. In the combined registry this includes indexed,
|
|
235
|
+
directory-style Hugging Face Skills from Meilisearch and generated single-file Space skill
|
|
236
|
+
wrappers. `--kind space` requests raw Hugging Face Space descriptors. `--kind mcp` requests
|
|
237
|
+
MCP server entries for Spaces tagged `mcp-server`. `--kind all` asks for the default mixed
|
|
238
|
+
view.
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
> hf-discover --version
|
|
242
|
+
> hf-discover search "generate image" --limit 5
|
|
243
|
+
> hf-discover search "generate image" --kind skill --json
|
|
244
|
+
> hf-discover search "generate image" --kind space --json
|
|
245
|
+
> hf-discover search "generate image" --kind mcp --json
|
|
246
|
+
> hf-discover mcp-server-json mcp-tools/FLUX.1-Kontext-Dev
|
|
247
|
+
> hf-discover search --registry-url https://registry.example "generate image" --kind skill --json
|
|
248
|
+
> hf-discover search "generate image" --kind space --local
|
|
249
|
+
> hf-discover serve --port 8080
|
|
250
|
+
> hf-discover challenge serve --port 8090
|
|
251
|
+
> hf-discover challenge search "find tools and registries" --federation referrals --json
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Recommended `hf` extension usage
|
|
255
|
+
|
|
256
|
+
For Hugging Face CLI users, the recommended install path is as an `hf` extension:
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
> hf extensions install huggingface/hf-discover
|
|
260
|
+
> hf discover --version
|
|
261
|
+
> hf discover search "generate image" --kind space --limit 5
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
The project still documents examples as `hf-discover ...` because the same CLI is also
|
|
265
|
+
available as a standalone Python console script. When installed as an extension, replace
|
|
266
|
+
`hf-discover` with `hf discover`.
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
> curl -X POST http://localhost:8080/search \
|
|
270
|
+
-H 'content-type: application/json' \
|
|
271
|
+
-d '{"query":{"text":"upload files to a dataset repo","filter":{"type":["application/ai-skill"]}},"pageSize":5}'
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Search the targeted nested Spaces registry:
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
> curl -X POST http://localhost:8080/registries/huggingface/spaces/search \
|
|
278
|
+
-H 'content-type: application/json' \
|
|
279
|
+
-d '{"query":{"text":"remove background from image","filter":{"type":["application/ai-skill"]}},"pageSize":5}'
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Search the local challenge registry:
|
|
283
|
+
|
|
284
|
+
```bash
|
|
285
|
+
> curl -X POST http://localhost:8090/search \
|
|
286
|
+
-H 'content-type: application/json' \
|
|
287
|
+
-d '{"query":{"text":"find tools and registries"},"federation":"referrals","pageSize":10}'
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Fetch a generated skill:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
> curl http://localhost:8080/skills/huggingface/mcp-tools/FLUX.1-Kontext-Dev/SKILL.md
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
To get generic Hugging Face Space descriptors instead of skill wrappers, request:
|
|
297
|
+
|
|
298
|
+
```json
|
|
299
|
+
{"query":{"text":"remove background from image","filter":{"type":["application/vnd.huggingface.space+json"]}},"pageSize":5}
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### HF_TOKEN handling
|
|
303
|
+
|
|
304
|
+
Primary and nested Spaces registry search requests can forward a request-scoped Hugging
|
|
305
|
+
Face token for the downstream Spaces search call. The server checks
|
|
306
|
+
`X-HF-Authorization: Bearer ...`, then `Authorization: Bearer ...`, then `HF_TOKEN: ...`;
|
|
307
|
+
a header token overrides any token configured when the server starts and is not stored
|
|
308
|
+
beyond the request.
|