quarry-kb 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quarry_kb-0.1.0/.github/workflows/ci.yml +84 -0
- quarry_kb-0.1.0/.gitignore +16 -0
- quarry_kb-0.1.0/CHANGELOG.md +29 -0
- quarry_kb-0.1.0/CONTRIBUTING.md +67 -0
- quarry_kb-0.1.0/ISA.md +515 -0
- quarry_kb-0.1.0/LICENSE +21 -0
- quarry_kb-0.1.0/PKG-INFO +120 -0
- quarry_kb-0.1.0/README.md +87 -0
- quarry_kb-0.1.0/SPEC.md +241 -0
- quarry_kb-0.1.0/examples/quarry.toml +45 -0
- quarry_kb-0.1.0/pyproject.toml +71 -0
- quarry_kb-0.1.0/src/quarry/__init__.py +13 -0
- quarry_kb-0.1.0/src/quarry/__main__.py +6 -0
- quarry_kb-0.1.0/src/quarry/adapters/__init__.py +10 -0
- quarry_kb-0.1.0/src/quarry/adapters/base.py +34 -0
- quarry_kb-0.1.0/src/quarry/adapters/registry.py +76 -0
- quarry_kb-0.1.0/src/quarry/adapters/web.py +67 -0
- quarry_kb-0.1.0/src/quarry/adapters/youtube.py +80 -0
- quarry_kb-0.1.0/src/quarry/cli.py +218 -0
- quarry_kb-0.1.0/src/quarry/config.py +351 -0
- quarry_kb-0.1.0/src/quarry/discovery.py +256 -0
- quarry_kb-0.1.0/src/quarry/errors.py +22 -0
- quarry_kb-0.1.0/src/quarry/finish.py +56 -0
- quarry_kb-0.1.0/src/quarry/frontmatter.py +26 -0
- quarry_kb-0.1.0/src/quarry/git.py +48 -0
- quarry_kb-0.1.0/src/quarry/ingest.py +84 -0
- quarry_kb-0.1.0/src/quarry/lint.py +234 -0
- quarry_kb-0.1.0/src/quarry/manifest.py +79 -0
- quarry_kb-0.1.0/src/quarry/store.py +131 -0
- quarry_kb-0.1.0/tests/conftest.py +32 -0
- quarry_kb-0.1.0/tests/fixtures/lint_report.golden.txt +33 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/quarry.toml +2 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/raw/exists.md +1 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/ai/alpha.md +7 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/ai/beta.md +7 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/index.md +1 -0
- quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/misc/gamma.md +5 -0
- quarry_kb-0.1.0/tests/test_adapters.py +270 -0
- quarry_kb-0.1.0/tests/test_cli.py +106 -0
- quarry_kb-0.1.0/tests/test_config.py +162 -0
- quarry_kb-0.1.0/tests/test_config_matrix.py +73 -0
- quarry_kb-0.1.0/tests/test_discovery.py +222 -0
- quarry_kb-0.1.0/tests/test_doctor.py +20 -0
- quarry_kb-0.1.0/tests/test_finish.py +122 -0
- quarry_kb-0.1.0/tests/test_frontmatter.py +41 -0
- quarry_kb-0.1.0/tests/test_git.py +50 -0
- quarry_kb-0.1.0/tests/test_ingest.py +131 -0
- quarry_kb-0.1.0/tests/test_init.py +69 -0
- quarry_kb-0.1.0/tests/test_invariants.py +72 -0
- quarry_kb-0.1.0/tests/test_lint.py +111 -0
- quarry_kb-0.1.0/tests/test_manifest.py +62 -0
- quarry_kb-0.1.0/tests/test_packaging.py +50 -0
- quarry_kb-0.1.0/tests/test_store.py +116 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master, main]
|
|
6
|
+
tags: ["v*", "test-v*"] # release / testpypi jobs gate on the tag ref
|
|
7
|
+
pull_request:
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
schedule:
|
|
10
|
+
- cron: "0 6 * * 1" # weekly — exercises the gated integration job
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
- name: Install uv
|
|
22
|
+
uses: astral-sh/setup-uv@v5
|
|
23
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
24
|
+
run: uv python install ${{ matrix.python-version }}
|
|
25
|
+
- name: Lint
|
|
26
|
+
run: uv run --python ${{ matrix.python-version }} --extra dev ruff check .
|
|
27
|
+
- name: Test (hermetic, no network) + coverage gate
|
|
28
|
+
run: |
|
|
29
|
+
uv run --python ${{ matrix.python-version }} --extra dev --extra all \
|
|
30
|
+
python -m coverage run -m pytest -q
|
|
31
|
+
uv run --python ${{ matrix.python-version }} --extra dev --extra all \
|
|
32
|
+
python -m coverage report --fail-under=90
|
|
33
|
+
- name: Coverage XML
|
|
34
|
+
run: uv run --python ${{ matrix.python-version }} --extra dev python -m coverage xml
|
|
35
|
+
- name: Upload coverage
|
|
36
|
+
uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: coverage-${{ matrix.python-version }}
|
|
39
|
+
path: coverage.xml
|
|
40
|
+
|
|
41
|
+
# Live network tests for the adapters — manual/nightly only, never in the default gate.
|
|
42
|
+
integration:
|
|
43
|
+
if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
steps:
|
|
46
|
+
- uses: actions/checkout@v4
|
|
47
|
+
- uses: astral-sh/setup-uv@v5
|
|
48
|
+
- run: uv python install 3.12
|
|
49
|
+
- name: Integration tests
|
|
50
|
+
run: uv run --python 3.12 --extra dev --extra all python -m pytest -m integration -q
|
|
51
|
+
|
|
52
|
+
# Dry-run publish to TestPyPI via trusted publishing. Fires on a `test-v*` tag
|
|
53
|
+
# (e.g. test-v0.1.0) — a throwaway rehearsal of the real release flow.
|
|
54
|
+
testpypi:
|
|
55
|
+
if: startsWith(github.ref, 'refs/tags/test-v')
|
|
56
|
+
needs: test
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
permissions:
|
|
59
|
+
id-token: write # trusted publishing
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
- uses: astral-sh/setup-uv@v5
|
|
63
|
+
- name: Build
|
|
64
|
+
run: uv build
|
|
65
|
+
- name: Publish to TestPyPI
|
|
66
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
67
|
+
with:
|
|
68
|
+
repository-url: https://test.pypi.org/legacy/
|
|
69
|
+
|
|
70
|
+
# Tagged-release publish to PyPI via trusted publishing. DEFERRED: wired but only
|
|
71
|
+
# fires on a version tag, which is not pushed until publishing is enabled.
|
|
72
|
+
release:
|
|
73
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
74
|
+
needs: test
|
|
75
|
+
runs-on: ubuntu-latest
|
|
76
|
+
permissions:
|
|
77
|
+
id-token: write # trusted publishing
|
|
78
|
+
steps:
|
|
79
|
+
- uses: actions/checkout@v4
|
|
80
|
+
- uses: astral-sh/setup-uv@v5
|
|
81
|
+
- name: Build
|
|
82
|
+
run: uv build
|
|
83
|
+
- name: Publish to PyPI
|
|
84
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# transient — build reference only, delete after extraction
|
|
2
|
+
EXTRACTION-SOURCE-kb.py
|
|
3
|
+
.quarry/
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.pyc
|
|
6
|
+
|
|
7
|
+
# Python tooling
|
|
8
|
+
.venv/
|
|
9
|
+
.coverage
|
|
10
|
+
htmlcov/
|
|
11
|
+
.pytest_cache/
|
|
12
|
+
.ruff_cache/
|
|
13
|
+
*.egg-info/
|
|
14
|
+
dist/
|
|
15
|
+
build/
|
|
16
|
+
uv.lock
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to Quarry are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and Quarry adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [Unreleased]
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- Initial implementation, generalised from the private `bin/kb` harness.
|
|
11
|
+
- Configuration layer (`quarry.toml`) with `quarry init` scaffolding a fully-commented
|
|
12
|
+
default; walk-up discovery, `[tool.quarry]` fallback, validation (unknown-key warnings,
|
|
13
|
+
type and enum errors), and a `Config` dataclass threaded through every command.
|
|
14
|
+
- Store path templating: `raw_layout` and `slug` from config across the full token set.
|
|
15
|
+
- Compile-manifest seam (`manifest.py`) — write/load/hash round-trip.
|
|
16
|
+
- Adapters: contract + registry with entry-point plugin discovery and an `enabled`
|
|
17
|
+
allowlist; built-in `youtube` and `web` adapters (extras-gated, hermetically tested).
|
|
18
|
+
- `ingest` (resolve → fetch → raw → manifest with an `on_duplicate` dedup pre-check) and
|
|
19
|
+
`finish` (provenance verify → lint → commit, push only when opted in).
|
|
20
|
+
- Config-driven structural-health `lint` returning a structured `LintResult`, with a
|
|
21
|
+
golden-output test.
|
|
22
|
+
- Optional `discovery` backend (`qmd`) powering dedup, `related`, and `densify`; degrades
|
|
23
|
+
gracefully when the backend is `none` or the tool is absent.
|
|
24
|
+
- `doctor` command; commands `init`, `adapters`, `ingest`, `finish`, `lint`, `related`,
|
|
25
|
+
`densify`.
|
|
26
|
+
- Extensive hermetic test suite (no network, no API keys) with a ≥90% coverage gate,
|
|
27
|
+
`ruff`, and a Python 3.11–3.13 CI matrix.
|
|
28
|
+
|
|
29
|
+
[Unreleased]: https://github.com/asachs/quarry-kb/commits/master
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Contributing to Quarry
|
|
2
|
+
|
|
3
|
+
Thanks for your interest. Quarry is small, deterministic, and heavily tested by design —
|
|
4
|
+
contributions are expected to keep it that way.
|
|
5
|
+
|
|
6
|
+
## Development setup
|
|
7
|
+
|
|
8
|
+
Quarry uses [`uv`](https://docs.astral.sh/uv/) for environments and Python management.
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
uv run --extra dev --extra all python -m pytest # run the suite
|
|
12
|
+
uv run --extra dev ruff check . # lint
|
|
13
|
+
uv run --extra dev --extra all python -m coverage run -m pytest \
|
|
14
|
+
&& uv run --extra dev python -m coverage report --fail-under=90
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Quality gates (enforced in CI): **all tests pass**, **`ruff` clean**, **coverage ≥ 90%**,
|
|
18
|
+
across Python 3.11–3.13.
|
|
19
|
+
|
|
20
|
+
## The adapter contract
|
|
21
|
+
|
|
22
|
+
A new source type is a small, self-contained adapter — no fork required.
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from quarry.adapters.base import Adapter, FetchResult
|
|
26
|
+
|
|
27
|
+
class PdfAdapter(Adapter):
|
|
28
|
+
name = "pdf"
|
|
29
|
+
|
|
30
|
+
def matches(self, url: str) -> bool:
|
|
31
|
+
return url.lower().endswith(".pdf")
|
|
32
|
+
|
|
33
|
+
def fetch(self, url: str) -> FetchResult:
|
|
34
|
+
# Put network/IO in small overridable methods so tests stay hermetic.
|
|
35
|
+
return FetchResult(content="...", metadata={
|
|
36
|
+
"title": "...", "url": url, "date": "2026-01-01", "source_id": "...",
|
|
37
|
+
})
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Register it via an entry point in your package's `pyproject.toml`:
|
|
41
|
+
|
|
42
|
+
```toml
|
|
43
|
+
[project.entry-points."quarry.adapters"]
|
|
44
|
+
pdf = "your_package.pdf:PdfAdapter"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Then enable it in the consuming wiki's `quarry.toml`:
|
|
48
|
+
|
|
49
|
+
```toml
|
|
50
|
+
[adapters]
|
|
51
|
+
enabled = ["youtube", "web", "pdf"]
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Requirements for an adapter PR
|
|
55
|
+
|
|
56
|
+
1. **Hermetic test.** Network calls live behind overridable methods; tests use a recorded
|
|
57
|
+
fixture/cassette and must pass with no network and no API keys.
|
|
58
|
+
2. **One `@pytest.mark.integration` live test** is allowed (excluded from the default run).
|
|
59
|
+
3. **Clean errors.** A missing optional dependency raises a `QuarryError` with an install
|
|
60
|
+
hint — never a bare `ImportError` traceback.
|
|
61
|
+
|
|
62
|
+
## Principles to respect
|
|
63
|
+
|
|
64
|
+
- **Quarry never calls an LLM.** The article-writing step happens between `ingest` and
|
|
65
|
+
`finish`, performed by a human or agent — not by Quarry.
|
|
66
|
+
- **No hardcoded conventions.** Everything configurable flows through the `Config` dataclass.
|
|
67
|
+
- **Fail loud, fail tested.** Every mechanical step has a test that catches its break.
|