quarry-kb 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. quarry_kb-0.1.0/.github/workflows/ci.yml +84 -0
  2. quarry_kb-0.1.0/.gitignore +16 -0
  3. quarry_kb-0.1.0/CHANGELOG.md +29 -0
  4. quarry_kb-0.1.0/CONTRIBUTING.md +67 -0
  5. quarry_kb-0.1.0/ISA.md +515 -0
  6. quarry_kb-0.1.0/LICENSE +21 -0
  7. quarry_kb-0.1.0/PKG-INFO +120 -0
  8. quarry_kb-0.1.0/README.md +87 -0
  9. quarry_kb-0.1.0/SPEC.md +241 -0
  10. quarry_kb-0.1.0/examples/quarry.toml +45 -0
  11. quarry_kb-0.1.0/pyproject.toml +71 -0
  12. quarry_kb-0.1.0/src/quarry/__init__.py +13 -0
  13. quarry_kb-0.1.0/src/quarry/__main__.py +6 -0
  14. quarry_kb-0.1.0/src/quarry/adapters/__init__.py +10 -0
  15. quarry_kb-0.1.0/src/quarry/adapters/base.py +34 -0
  16. quarry_kb-0.1.0/src/quarry/adapters/registry.py +76 -0
  17. quarry_kb-0.1.0/src/quarry/adapters/web.py +67 -0
  18. quarry_kb-0.1.0/src/quarry/adapters/youtube.py +80 -0
  19. quarry_kb-0.1.0/src/quarry/cli.py +218 -0
  20. quarry_kb-0.1.0/src/quarry/config.py +351 -0
  21. quarry_kb-0.1.0/src/quarry/discovery.py +256 -0
  22. quarry_kb-0.1.0/src/quarry/errors.py +22 -0
  23. quarry_kb-0.1.0/src/quarry/finish.py +56 -0
  24. quarry_kb-0.1.0/src/quarry/frontmatter.py +26 -0
  25. quarry_kb-0.1.0/src/quarry/git.py +48 -0
  26. quarry_kb-0.1.0/src/quarry/ingest.py +84 -0
  27. quarry_kb-0.1.0/src/quarry/lint.py +234 -0
  28. quarry_kb-0.1.0/src/quarry/manifest.py +79 -0
  29. quarry_kb-0.1.0/src/quarry/store.py +131 -0
  30. quarry_kb-0.1.0/tests/conftest.py +32 -0
  31. quarry_kb-0.1.0/tests/fixtures/lint_report.golden.txt +33 -0
  32. quarry_kb-0.1.0/tests/fixtures/lint_store/quarry.toml +2 -0
  33. quarry_kb-0.1.0/tests/fixtures/lint_store/raw/exists.md +1 -0
  34. quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/ai/alpha.md +7 -0
  35. quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/ai/beta.md +7 -0
  36. quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/index.md +1 -0
  37. quarry_kb-0.1.0/tests/fixtures/lint_store/wiki/misc/gamma.md +5 -0
  38. quarry_kb-0.1.0/tests/test_adapters.py +270 -0
  39. quarry_kb-0.1.0/tests/test_cli.py +106 -0
  40. quarry_kb-0.1.0/tests/test_config.py +162 -0
  41. quarry_kb-0.1.0/tests/test_config_matrix.py +73 -0
  42. quarry_kb-0.1.0/tests/test_discovery.py +222 -0
  43. quarry_kb-0.1.0/tests/test_doctor.py +20 -0
  44. quarry_kb-0.1.0/tests/test_finish.py +122 -0
  45. quarry_kb-0.1.0/tests/test_frontmatter.py +41 -0
  46. quarry_kb-0.1.0/tests/test_git.py +50 -0
  47. quarry_kb-0.1.0/tests/test_ingest.py +131 -0
  48. quarry_kb-0.1.0/tests/test_init.py +69 -0
  49. quarry_kb-0.1.0/tests/test_invariants.py +72 -0
  50. quarry_kb-0.1.0/tests/test_lint.py +111 -0
  51. quarry_kb-0.1.0/tests/test_manifest.py +62 -0
  52. quarry_kb-0.1.0/tests/test_packaging.py +50 -0
  53. quarry_kb-0.1.0/tests/test_store.py +116 -0
@@ -0,0 +1,84 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master, main]
6
+ tags: ["v*", "test-v*"] # release / testpypi jobs gate on the tag ref
7
+ pull_request:
8
+ workflow_dispatch:
9
+ schedule:
10
+ - cron: "0 6 * * 1" # weekly — exercises the gated integration job
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ python-version: ["3.11", "3.12", "3.13"]
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - name: Install uv
22
+ uses: astral-sh/setup-uv@v5
23
+ - name: Set up Python ${{ matrix.python-version }}
24
+ run: uv python install ${{ matrix.python-version }}
25
+ - name: Lint
26
+ run: uv run --python ${{ matrix.python-version }} --extra dev ruff check .
27
+ - name: Test (hermetic, no network) + coverage gate
28
+ run: |
29
+ uv run --python ${{ matrix.python-version }} --extra dev --extra all \
30
+ python -m coverage run -m pytest -q
31
+ uv run --python ${{ matrix.python-version }} --extra dev --extra all \
32
+ python -m coverage report --fail-under=90
33
+ - name: Coverage XML
34
+ run: uv run --python ${{ matrix.python-version }} --extra dev python -m coverage xml
35
+ - name: Upload coverage
36
+ uses: actions/upload-artifact@v4
37
+ with:
38
+ name: coverage-${{ matrix.python-version }}
39
+ path: coverage.xml
40
+
41
+ # Live network tests for the adapters — manual/nightly only, never in the default gate.
42
+ integration:
43
+ if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
44
+ runs-on: ubuntu-latest
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+ - uses: astral-sh/setup-uv@v5
48
+ - run: uv python install 3.12
49
+ - name: Integration tests
50
+ run: uv run --python 3.12 --extra dev --extra all python -m pytest -m integration -q
51
+
52
+ # Dry-run publish to TestPyPI via trusted publishing. Fires on a `test-v*` tag
53
+ # (e.g. test-v0.1.0) — a throwaway rehearsal of the real release flow.
54
+ testpypi:
55
+ if: startsWith(github.ref, 'refs/tags/test-v')
56
+ needs: test
57
+ runs-on: ubuntu-latest
58
+ permissions:
59
+ id-token: write # trusted publishing
60
+ steps:
61
+ - uses: actions/checkout@v4
62
+ - uses: astral-sh/setup-uv@v5
63
+ - name: Build
64
+ run: uv build
65
+ - name: Publish to TestPyPI
66
+ uses: pypa/gh-action-pypi-publish@release/v1
67
+ with:
68
+ repository-url: https://test.pypi.org/legacy/
69
+
70
+ # Tagged-release publish to PyPI via trusted publishing. DEFERRED: wired but only
71
+ # fires on a version tag, which is not pushed until publishing is enabled.
72
+ release:
73
+ if: startsWith(github.ref, 'refs/tags/v')
74
+ needs: test
75
+ runs-on: ubuntu-latest
76
+ permissions:
77
+ id-token: write # trusted publishing
78
+ steps:
79
+ - uses: actions/checkout@v4
80
+ - uses: astral-sh/setup-uv@v5
81
+ - name: Build
82
+ run: uv build
83
+ - name: Publish to PyPI
84
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,16 @@
1
+ # transient — build reference only, delete after extraction
2
+ EXTRACTION-SOURCE-kb.py
3
+ .quarry/
4
+ __pycache__/
5
+ *.pyc
6
+
7
+ # Python tooling
8
+ .venv/
9
+ .coverage
10
+ htmlcov/
11
+ .pytest_cache/
12
+ .ruff_cache/
13
+ *.egg-info/
14
+ dist/
15
+ build/
16
+ uv.lock
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ All notable changes to Quarry are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and Quarry adheres to
5
+ [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ### Added
10
+ - Initial implementation, generalised from the private `bin/kb` harness.
11
+ - Configuration layer (`quarry.toml`) with `quarry init` scaffolding a fully-commented
12
+ default; walk-up discovery, `[tool.quarry]` fallback, validation (unknown-key warnings,
13
+ type and enum errors), and a `Config` dataclass threaded through every command.
14
+ - Store path templating: `raw_layout` and `slug` from config across the full token set.
15
+ - Compile-manifest seam (`manifest.py`) — write/load/hash round-trip.
16
+ - Adapters: contract + registry with entry-point plugin discovery and an `enabled`
17
+ allowlist; built-in `youtube` and `web` adapters (extras-gated, hermetically tested).
18
+ - `ingest` (resolve → fetch → raw → manifest with an `on_duplicate` dedup pre-check) and
19
+ `finish` (provenance verify → lint → commit, push only when opted in).
20
+ - Config-driven structural-health `lint` returning a structured `LintResult`, with a
21
+ golden-output test.
22
+ - Optional `discovery` backend (`qmd`) powering dedup, `related`, and `densify`; degrades
23
+ gracefully when the backend is `none` or the tool is absent.
24
+ - `doctor` command; commands `init`, `adapters`, `ingest`, `finish`, `lint`, `related`,
25
+ `densify`.
26
+ - Extensive hermetic test suite (no network, no API keys) with a ≥90% coverage gate,
27
+ `ruff`, and a Python 3.11–3.13 CI matrix.
28
+
29
+ [Unreleased]: https://github.com/asachs/quarry-kb/commits/master
@@ -0,0 +1,67 @@
1
+ # Contributing to Quarry
2
+
3
+ Thanks for your interest. Quarry is small, deterministic, and heavily tested by design —
4
+ contributions are expected to keep it that way.
5
+
6
+ ## Development setup
7
+
8
+ Quarry uses [`uv`](https://docs.astral.sh/uv/) for environments and Python management.
9
+
10
+ ```bash
11
+ uv run --extra dev --extra all python -m pytest # run the suite
12
+ uv run --extra dev ruff check . # lint
13
+ uv run --extra dev --extra all python -m coverage run -m pytest \
14
+ && uv run --extra dev python -m coverage report --fail-under=90
15
+ ```
16
+
17
+ Quality gates (enforced in CI): **all tests pass**, **`ruff` clean**, **coverage ≥ 90%**,
18
+ across Python 3.11–3.13.
19
+
20
+ ## The adapter contract
21
+
22
+ A new source type is a small, self-contained adapter — no fork required.
23
+
24
+ ```python
25
+ from quarry.adapters.base import Adapter, FetchResult
26
+
27
+ class PdfAdapter(Adapter):
28
+ name = "pdf"
29
+
30
+ def matches(self, url: str) -> bool:
31
+ return url.lower().endswith(".pdf")
32
+
33
+ def fetch(self, url: str) -> FetchResult:
34
+ # Put network/IO in small overridable methods so tests stay hermetic.
35
+ return FetchResult(content="...", metadata={
36
+ "title": "...", "url": url, "date": "2026-01-01", "source_id": "...",
37
+ })
38
+ ```
39
+
40
+ Register it via an entry point in your package's `pyproject.toml`:
41
+
42
+ ```toml
43
+ [project.entry-points."quarry.adapters"]
44
+ pdf = "your_package.pdf:PdfAdapter"
45
+ ```
46
+
47
+ Then enable it in the consuming wiki's `quarry.toml`:
48
+
49
+ ```toml
50
+ [adapters]
51
+ enabled = ["youtube", "web", "pdf"]
52
+ ```
53
+
54
+ ### Requirements for an adapter PR
55
+
56
+ 1. **Hermetic test.** Network calls live behind overridable methods; tests use a recorded
57
+ fixture/cassette and must pass with no network and no API keys.
58
+ 2. **One `@pytest.mark.integration` live test** is allowed (excluded from the default run).
59
+ 3. **Clean errors.** A missing optional dependency raises a `QuarryError` with an install
60
+ hint — never a bare `ImportError` traceback.
61
+
62
+ ## Principles to respect
63
+
64
+ - **Quarry never calls an LLM.** The article-writing step happens between `ingest` and
65
+ `finish`, performed by a human or agent — not by Quarry.
66
+ - **No hardcoded conventions.** Everything configurable flows through the `Config` dataclass.
67
+ - **Fail loud, fail tested.** Every mechanical step has a test that catches its break.