rag-spine 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rag_spine-0.1.1/.claude/settings.json +22 -0
- rag_spine-0.1.1/.claude/skills/ragspine-make/SKILL.md +68 -0
- rag_spine-0.1.1/.githooks/pre-push +9 -0
- rag_spine-0.1.1/.github/workflows/ci.yml +38 -0
- rag_spine-0.1.1/.github/workflows/release.yml +57 -0
- rag_spine-0.1.1/.gitignore +40 -0
- rag_spine-0.1.1/.project-root +0 -0
- rag_spine-0.1.1/CLAUDE.md +94 -0
- rag_spine-0.1.1/LICENSE +201 -0
- rag_spine-0.1.1/Makefile +127 -0
- rag_spine-0.1.1/NOTICE +9 -0
- rag_spine-0.1.1/PKG-INFO +308 -0
- rag_spine-0.1.1/README.md +250 -0
- rag_spine-0.1.1/config/company.example.toml +52 -0
- rag_spine-0.1.1/data/.gitkeep +0 -0
- rag_spine-0.1.1/data/golden/qa_baseline.json +22 -0
- rag_spine-0.1.1/data/golden/qa_golden_set.jsonl +41 -0
- rag_spine-0.1.1/data/golden/query_metric_tool_schema.json +65 -0
- rag_spine-0.1.1/data/golden/retrieval_ab_corpus.jsonl +16 -0
- rag_spine-0.1.1/data/golden/retrieval_ab_real.jsonl +12 -0
- rag_spine-0.1.1/data/golden/retrieval_ab_sample.jsonl +3 -0
- rag_spine-0.1.1/docs/README.md +68 -0
- rag_spine-0.1.1/docs/adr/0001-dual-channel-determinism.md +25 -0
- rag_spine-0.1.1/docs/adr/0002-product-direction.md +76 -0
- rag_spine-0.1.1/docs/adr/0003-audience-oss-library.md +44 -0
- rag_spine-0.1.1/docs/adr/0004-domain-profile-generalization.md +48 -0
- rag_spine-0.1.1/docs/adr/0005-lean-core-experimental-isolation.md +47 -0
- rag_spine-0.1.1/docs/adr/0006-quality-bar-invariants-and-benchmark.md +51 -0
- rag_spine-0.1.1/docs/adr/0007-multilingual-architect-for-five-ship-two.md +46 -0
- rag_spine-0.1.1/docs/adr/0008-prompt-registry-packaging.md +44 -0
- rag_spine-0.1.1/docs/adr/0009-dependency-and-framework-policy.md +53 -0
- rag_spine-0.1.1/docs/adr/0010-intent-parser-security-decoupling.md +57 -0
- rag_spine-0.1.1/docs/adr/0011-python-project-standard-divergences.md +82 -0
- rag_spine-0.1.1/docs/architecture.md +24 -0
- rag_spine-0.1.1/docs/generated/.gitkeep +0 -0
- rag_spine-0.1.1/docs/glossary.md +13 -0
- rag_spine-0.1.1/docs/invariants.md +30 -0
- rag_spine-0.1.1/docs/prd-breadth-via-adapters.md +246 -0
- rag_spine-0.1.1/docs/prd-pipeline-topology-export.md +181 -0
- rag_spine-0.1.1/docs/prd-vector-store-seam.md +202 -0
- rag_spine-0.1.1/pyproject.toml +156 -0
- rag_spine-0.1.1/scripts/ask.py +100 -0
- rag_spine-0.1.1/scripts/build_docs.py +528 -0
- rag_spine-0.1.1/scripts/check_doc_drift.py +143 -0
- rag_spine-0.1.1/scripts/check_docstring_refs.py +202 -0
- rag_spine-0.1.1/scripts/ci.sh +44 -0
- rag_spine-0.1.1/scripts/classify_pdfs.py +135 -0
- rag_spine-0.1.1/scripts/eval_retrieval_ab.py +280 -0
- rag_spine-0.1.1/scripts/ingest.py +139 -0
- rag_spine-0.1.1/scripts/ingest_narrative.py +84 -0
- rag_spine-0.1.1/scripts/lint.sh +24 -0
- rag_spine-0.1.1/scripts/make_fixtures_excel.py +424 -0
- rag_spine-0.1.1/scripts/make_fixtures_pdf.py +477 -0
- rag_spine-0.1.1/scripts/make_fixtures_pptx.py +457 -0
- rag_spine-0.1.1/scripts/make_synthetic_deck.py +177 -0
- rag_spine-0.1.1/scripts/run_demo.py +168 -0
- rag_spine-0.1.1/scripts/run_qa_eval.py +104 -0
- rag_spine-0.1.1/scripts/run_server.py +39 -0
- rag_spine-0.1.1/scripts/run_worker.py +40 -0
- rag_spine-0.1.1/scripts/topology.py +94 -0
- rag_spine-0.1.1/src/ragspine/__init__.py +30 -0
- rag_spine-0.1.1/src/ragspine/agent/CLAUDE.md +87 -0
- rag_spine-0.1.1/src/ragspine/agent/__init__.py +12 -0
- rag_spine-0.1.1/src/ragspine/agent/agent.py +544 -0
- rag_spine-0.1.1/src/ragspine/agent/intent.py +403 -0
- rag_spine-0.1.1/src/ragspine/agent/llm_provider.py +265 -0
- rag_spine-0.1.1/src/ragspine/agent/query_tools.py +252 -0
- rag_spine-0.1.1/src/ragspine/agent/security_gate.py +116 -0
- rag_spine-0.1.1/src/ragspine/common/CLAUDE.md +30 -0
- rag_spine-0.1.1/src/ragspine/common/__init__.py +12 -0
- rag_spine-0.1.1/src/ragspine/common/company_profile.py +370 -0
- rag_spine-0.1.1/src/ragspine/common/core.py +23 -0
- rag_spine-0.1.1/src/ragspine/common/glossary.py +210 -0
- rag_spine-0.1.1/src/ragspine/common/observability.py +32 -0
- rag_spine-0.1.1/src/ragspine/common/sensitivity.py +56 -0
- rag_spine-0.1.1/src/ragspine/eval/CLAUDE.md +37 -0
- rag_spine-0.1.1/src/ragspine/eval/__init__.py +9 -0
- rag_spine-0.1.1/src/ragspine/eval/extraction_eval.py +154 -0
- rag_spine-0.1.1/src/ragspine/eval/qa_eval.py +717 -0
- rag_spine-0.1.1/src/ragspine/extraction/CLAUDE.md +36 -0
- rag_spine-0.1.1/src/ragspine/extraction/__init__.py +12 -0
- rag_spine-0.1.1/src/ragspine/extraction/color/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/extraction/color/color_semantics.py +322 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/__init__.py +13 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/pdf_digital_extractor.py +228 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/pdf_scanned_extractor.py +447 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/pptx_extractor.py +210 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/pptx_styled_extractor.py +357 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/xlsx_extractor.py +100 -0
- rag_spine-0.1.1/src/ragspine/extraction/extractors/xlsx_styled_extractor.py +188 -0
- rag_spine-0.1.1/src/ragspine/extraction/ir.py +111 -0
- rag_spine-0.1.1/src/ragspine/extraction/routing/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/extraction/routing/pdf_router.py +230 -0
- rag_spine-0.1.1/src/ragspine/extraction/verification/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/extraction/verification/dual_channel_verifier.py +216 -0
- rag_spine-0.1.1/src/ragspine/ingestion/CLAUDE.md +29 -0
- rag_spine-0.1.1/src/ragspine/ingestion/__init__.py +9 -0
- rag_spine-0.1.1/src/ragspine/ingestion/narrative/__init__.py +8 -0
- rag_spine-0.1.1/src/ragspine/ingestion/narrative/narrative_extract.py +152 -0
- rag_spine-0.1.1/src/ragspine/ingestion/narrative/narrative_ingest.py +284 -0
- rag_spine-0.1.1/src/ragspine/ingestion/review/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/ingestion/review/review_queue.py +291 -0
- rag_spine-0.1.1/src/ragspine/ingestion/structured/__init__.py +9 -0
- rag_spine-0.1.1/src/ragspine/ingestion/structured/ingestion.py +628 -0
- rag_spine-0.1.1/src/ragspine/ingestion/structured/ingestion_manifest.py +317 -0
- rag_spine-0.1.1/src/ragspine/pipeline/CLAUDE.md +67 -0
- rag_spine-0.1.1/src/ragspine/pipeline/__init__.py +29 -0
- rag_spine-0.1.1/src/ragspine/pipeline/graph.py +158 -0
- rag_spine-0.1.1/src/ragspine/pipeline/topology.py +213 -0
- rag_spine-0.1.1/src/ragspine/py.typed +0 -0
- rag_spine-0.1.1/src/ragspine/retrieval/CLAUDE.md +32 -0
- rag_spine-0.1.1/src/ragspine/retrieval/__init__.py +12 -0
- rag_spine-0.1.1/src/ragspine/retrieval/chunking/__init__.py +8 -0
- rag_spine-0.1.1/src/ragspine/retrieval/chunking/chunk_store.py +211 -0
- rag_spine-0.1.1/src/ragspine/retrieval/chunking/chunking.py +202 -0
- rag_spine-0.1.1/src/ragspine/retrieval/lexical/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/retrieval/lexical/retrieval.py +422 -0
- rag_spine-0.1.1/src/ragspine/retrieval/link/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/retrieval/link/narrative_link.py +145 -0
- rag_spine-0.1.1/src/ragspine/retrieval/rerank/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/retrieval/rerank/listwise_rerank.py +135 -0
- rag_spine-0.1.1/src/ragspine/retrieval/vector/__init__.py +9 -0
- rag_spine-0.1.1/src/ragspine/retrieval/vector/embedding_backends.py +314 -0
- rag_spine-0.1.1/src/ragspine/retrieval/vector/store.py +171 -0
- rag_spine-0.1.1/src/ragspine/service/CLAUDE.md +30 -0
- rag_spine-0.1.1/src/ragspine/service/__init__.py +11 -0
- rag_spine-0.1.1/src/ragspine/service/api/__init__.py +11 -0
- rag_spine-0.1.1/src/ragspine/service/api/app.py +38 -0
- rag_spine-0.1.1/src/ragspine/service/api/dependencies.py +29 -0
- rag_spine-0.1.1/src/ragspine/service/api/routes.py +291 -0
- rag_spine-0.1.1/src/ragspine/service/api/schemas.py +78 -0
- rag_spine-0.1.1/src/ragspine/service/config.py +120 -0
- rag_spine-0.1.1/src/ragspine/service/faq/__init__.py +8 -0
- rag_spine-0.1.1/src/ragspine/service/faq/faq_cache.py +170 -0
- rag_spine-0.1.1/src/ragspine/service/tasks/__init__.py +8 -0
- rag_spine-0.1.1/src/ragspine/service/tasks/jobs.py +173 -0
- rag_spine-0.1.1/src/ragspine/service/tasks/task_queue.py +202 -0
- rag_spine-0.1.1/src/ragspine/storage/CLAUDE.md +42 -0
- rag_spine-0.1.1/src/ragspine/storage/__init__.py +7 -0
- rag_spine-0.1.1/src/ragspine/storage/fact_store.py +358 -0
- rag_spine-0.1.1/tests/__init__.py +0 -0
- rag_spine-0.1.1/tests/agent/__init__.py +0 -0
- rag_spine-0.1.1/tests/agent/test_agent_orchestrator.py +263 -0
- rag_spine-0.1.1/tests/agent/test_composite.py +265 -0
- rag_spine-0.1.1/tests/agent/test_external_entity_guard.py +571 -0
- rag_spine-0.1.1/tests/agent/test_intent.py +221 -0
- rag_spine-0.1.1/tests/agent/test_intent_parser.py +138 -0
- rag_spine-0.1.1/tests/agent/test_llm_provider.py +221 -0
- rag_spine-0.1.1/tests/agent/test_query_tools_schema.py +119 -0
- rag_spine-0.1.1/tests/agent/test_security_gate.py +145 -0
- rag_spine-0.1.1/tests/common/__init__.py +0 -0
- rag_spine-0.1.1/tests/common/test_company_generalization.py +335 -0
- rag_spine-0.1.1/tests/common/test_company_profile.py +75 -0
- rag_spine-0.1.1/tests/common/test_domain_profile.py +87 -0
- rag_spine-0.1.1/tests/common/test_domain_profile_vocab_frozen.py +39 -0
- rag_spine-0.1.1/tests/common/test_observability_resilience.py +369 -0
- rag_spine-0.1.1/tests/common/test_sensitivity.py +373 -0
- rag_spine-0.1.1/tests/conformance/__init__.py +0 -0
- rag_spine-0.1.1/tests/conformance/conftest.py +69 -0
- rag_spine-0.1.1/tests/conformance/test_vector_store_contract.py +291 -0
- rag_spine-0.1.1/tests/conformance/test_vector_store_invariants.py +112 -0
- rag_spine-0.1.1/tests/conftest.py +175 -0
- rag_spine-0.1.1/tests/e2e/__init__.py +0 -0
- rag_spine-0.1.1/tests/e2e/test_ask_e2e.py +86 -0
- rag_spine-0.1.1/tests/e2e/test_e2e.py +190 -0
- rag_spine-0.1.1/tests/e2e/test_fixture_regeneration.py +300 -0
- rag_spine-0.1.1/tests/eval/__init__.py +0 -0
- rag_spine-0.1.1/tests/eval/test_extraction_eval.py +354 -0
- rag_spine-0.1.1/tests/eval/test_lab_domain.py +401 -0
- rag_spine-0.1.1/tests/eval/test_qa_eval.py +598 -0
- rag_spine-0.1.1/tests/eval/test_qa_eval_fabrication_frozen.py +21 -0
- rag_spine-0.1.1/tests/extraction/__init__.py +0 -0
- rag_spine-0.1.1/tests/extraction/color/__init__.py +0 -0
- rag_spine-0.1.1/tests/extraction/color/test_color_semantics.py +341 -0
- rag_spine-0.1.1/tests/extraction/extractors/__init__.py +0 -0
- rag_spine-0.1.1/tests/extraction/extractors/test_pdf_digital_extractor.py +260 -0
- rag_spine-0.1.1/tests/extraction/extractors/test_pdf_scanned_extractor.py +455 -0
- rag_spine-0.1.1/tests/extraction/extractors/test_pptx_styled_extractor.py +479 -0
- rag_spine-0.1.1/tests/extraction/extractors/test_scanned_gpu.py +243 -0
- rag_spine-0.1.1/tests/extraction/extractors/test_xlsx_styled_extractor.py +313 -0
- rag_spine-0.1.1/tests/extraction/routing/__init__.py +0 -0
- rag_spine-0.1.1/tests/extraction/routing/test_pdf_router.py +461 -0
- rag_spine-0.1.1/tests/extraction/test_ir.py +264 -0
- rag_spine-0.1.1/tests/extraction/verification/__init__.py +0 -0
- rag_spine-0.1.1/tests/extraction/verification/test_dual_channel_verifier.py +422 -0
- rag_spine-0.1.1/tests/ingestion/__init__.py +0 -0
- rag_spine-0.1.1/tests/ingestion/narrative/__init__.py +0 -0
- rag_spine-0.1.1/tests/ingestion/narrative/test_narrative_extract.py +214 -0
- rag_spine-0.1.1/tests/ingestion/narrative/test_narrative_ingest.py +298 -0
- rag_spine-0.1.1/tests/ingestion/review/__init__.py +0 -0
- rag_spine-0.1.1/tests/ingestion/review/test_review_queue.py +378 -0
- rag_spine-0.1.1/tests/ingestion/structured/__init__.py +0 -0
- rag_spine-0.1.1/tests/ingestion/structured/test_ingestion.py +272 -0
- rag_spine-0.1.1/tests/ingestion/structured/test_manifest.py +348 -0
- rag_spine-0.1.1/tests/ingestion/test_ingest_cli.py +245 -0
- rag_spine-0.1.1/tests/ingestion/test_ingest_dispatch.py +679 -0
- rag_spine-0.1.1/tests/pipeline/__init__.py +0 -0
- rag_spine-0.1.1/tests/pipeline/test_cli.py +102 -0
- rag_spine-0.1.1/tests/pipeline/test_graph.py +192 -0
- rag_spine-0.1.1/tests/pipeline/test_topology.py +331 -0
- rag_spine-0.1.1/tests/retrieval/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/chunking/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/chunking/test_chunk_store.py +193 -0
- rag_spine-0.1.1/tests/retrieval/chunking/test_chunking.py +203 -0
- rag_spine-0.1.1/tests/retrieval/lexical/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/lexical/test_retrieval.py +449 -0
- rag_spine-0.1.1/tests/retrieval/link/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/link/test_narrative_link.py +420 -0
- rag_spine-0.1.1/tests/retrieval/rerank/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/rerank/test_listwise_rerank.py +265 -0
- rag_spine-0.1.1/tests/retrieval/test_retrieval_ab.py +271 -0
- rag_spine-0.1.1/tests/retrieval/vector/__init__.py +0 -0
- rag_spine-0.1.1/tests/retrieval/vector/test_embedding_backends.py +195 -0
- rag_spine-0.1.1/tests/retrieval/vector/test_embedding_deterministic.py +374 -0
- rag_spine-0.1.1/tests/retrieval/vector/test_embedding_device.py +269 -0
- rag_spine-0.1.1/tests/service/__init__.py +0 -0
- rag_spine-0.1.1/tests/service/api/__init__.py +0 -0
- rag_spine-0.1.1/tests/service/api/test_api_ask.py +305 -0
- rag_spine-0.1.1/tests/service/api/test_api_health.py +85 -0
- rag_spine-0.1.1/tests/service/api/test_api_jobs.py +241 -0
- rag_spine-0.1.1/tests/service/faq/__init__.py +0 -0
- rag_spine-0.1.1/tests/service/faq/test_faq_cache.py +244 -0
- rag_spine-0.1.1/tests/service/tasks/__init__.py +0 -0
- rag_spine-0.1.1/tests/service/tasks/test_ingest_jobs.py +191 -0
- rag_spine-0.1.1/tests/service/tasks/test_task_queue.py +134 -0
- rag_spine-0.1.1/tests/service/test_service_config.py +216 -0
- rag_spine-0.1.1/tests/service/test_service_integration.py +73 -0
- rag_spine-0.1.1/tests/storage/__init__.py +0 -0
- rag_spine-0.1.1/tests/storage/test_dim_key.py +190 -0
- rag_spine-0.1.1/tests/storage/test_fact_store_v2.py +342 -0
- rag_spine-0.1.1/tests/storage/test_fact_temporal.py +406 -0
- rag_spine-0.1.1/uv.lock +5782 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json.schemastore.org/claude-code-settings.json",
|
|
3
|
+
"permissions": {
|
|
4
|
+
"allow": [
|
|
5
|
+
"Bash(.venv/bin/python:*)",
|
|
6
|
+
"Bash(.venv/bin/python -m pytest:*)",
|
|
7
|
+
"Bash(python:*)",
|
|
8
|
+
"Bash(python3:*)",
|
|
9
|
+
"Bash(uv venv:*)",
|
|
10
|
+
"Bash(uv pip:*)",
|
|
11
|
+
"Bash(uv run:*)",
|
|
12
|
+
"Bash(ruff:*)",
|
|
13
|
+
"Bash(mypy:*)",
|
|
14
|
+
"Bash(git status:*)",
|
|
15
|
+
"Bash(git diff:*)",
|
|
16
|
+
"Bash(git log:*)",
|
|
17
|
+
"Bash(git show:*)",
|
|
18
|
+
"Bash(git branch:*)"
|
|
19
|
+
],
|
|
20
|
+
"deny": []
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ragspine-make
|
|
3
|
+
description: "How to run dev / CI-CD tasks in the RAGSpine repo via the Makefile. Use whenever you need to install deps, run tests, run the local CI gate, lint/format, check doc drift, run the demo, ask a question, run evals, start the server/worker, or regenerate fixtures in this project. Keywords: make, Makefile, run tests, pytest, CI, ci gate, pre-push, lint, ruff, mypy, format, doc drift, demo, ask, eval, qa eval, retrieval ab, serve, FastAPI, worker, RQ, redis, fixtures, install, venv, uv, hooks, build docs, 跑测试, 跑CI, 本地CI, 格式化, 漂移, 评估, 启动服务, 安装依赖, 怎么运行, 命令"
|
|
4
|
+
allowed-tools: ["Bash", "Read"]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# RAGSpine — dev / CI-CD commands (Makefile)
|
|
8
|
+
|
|
9
|
+
The repo wraps common commands in a root `Makefile`. **Always run from the repo root**
|
|
10
|
+
(scripts anchor on `.project-root`). `make help` lists every target. The raw commands
|
|
11
|
+
live in `scripts/`; the Makefile is a thin, discoverable wrapper over them.
|
|
12
|
+
|
|
13
|
+
**Interpreter:** every target defaults to `.venv/bin/python`. Override with
|
|
14
|
+
`make <target> PYTHON=python3.12` (passed through to `scripts/ci.sh` / `lint.sh` too).
|
|
15
|
+
|
|
16
|
+
## First-time setup
|
|
17
|
+
|
|
18
|
+
| Command | What it does | Notes |
|
|
19
|
+
|---|---|---|
|
|
20
|
+
| `make venv` | `uv venv .venv` | Creates the project venv. |
|
|
21
|
+
| `make install` | editable install, `[dev,service]` extras | The usual dev setup. |
|
|
22
|
+
| `make install-all` | editable install, `[dev,service,llm,embed]` | Adds real LLM + embedding backends (heavier). `[pdf]`/`[ocr]` are platform-specific and intentionally excluded. |
|
|
23
|
+
| `make hooks` | `git config core.hooksPath .githooks` | **One-time per clone.** Enables the pre-push CI gate so red code never leaves the machine. Emergency bypass: `git push --no-verify`. |
|
|
24
|
+
|
|
25
|
+
## The quality gate
|
|
26
|
+
|
|
27
|
+
| Command | What it does | Notes |
|
|
28
|
+
|---|---|---|
|
|
29
|
+
| `make ci` | tests (`-m "not gpu"`) + demo smoke | **The gate** — exactly what the pre-push hook runs (`scripts/ci.sh`). This is the single source of truth for "is it green". |
|
|
30
|
+
| `make test` | `pytest tests/ -q -m "not gpu"` | Expect **943 passed, 1 gpu-skipped**. |
|
|
31
|
+
| `make test-all` | full suite incl. `gpu` marker | Needs Ubuntu + NVIDIA GPU + real OCR model; skipped/failing elsewhere. |
|
|
32
|
+
| `make lint` | ruff check + ruff format --check + mypy | **Informational, non-blocking** (always exits 0). The inherited codebase predates linting; adopt fixes incrementally — it is *not* wired into the CI gate yet. |
|
|
33
|
+
| `make fmt` | `ruff check --fix` + `ruff format` | Auto-fix lint + format `ragspine scripts tests`. |
|
|
34
|
+
| `make drift` | `scripts/check_doc_drift.py` | Flags docs whose covered code changed since `verified-against`. Expect **10 tracked, 0 stale**. See `docs/README.md` for the convention. |
|
|
35
|
+
|
|
36
|
+
GitHub Actions is dormant (manual-trigger only) to avoid consuming minutes — the local
|
|
37
|
+
gate is authoritative.
|
|
38
|
+
|
|
39
|
+
## Demo / ask / eval
|
|
40
|
+
|
|
41
|
+
| Command | What it does | Notes |
|
|
42
|
+
|---|---|---|
|
|
43
|
+
| `make demo` | `scripts/run_demo.py` | Offline end-to-end; expect `ALL CHECKS PASSED`. |
|
|
44
|
+
| `make ask Q="…"` | offline question via MockProvider | e.g. `make ask Q="中国内地FY2024的REVENUE是多少"`. Uses `--provider mock --db data/fact_metric.db`. Ask for missing data → honest refusal, never a guess. |
|
|
45
|
+
| `make eval-qa` | `scripts/run_qa_eval.py` | QA harness, **baseline-gated** (fails on regression below the frozen floor; don't launder a regression with `--update-baseline`). |
|
|
46
|
+
| `make eval-retrieval` | `scripts/eval_retrieval_ab.py` | BM25-vs-hybrid Recall@k / MRR. Default gold is synthetic (proves the harness math, not real recall). |
|
|
47
|
+
|
|
48
|
+
## Service
|
|
49
|
+
|
|
50
|
+
| Command | What it does | Notes |
|
|
51
|
+
|---|---|---|
|
|
52
|
+
| `make serve` | `scripts/run_server.py` | FastAPI app. |
|
|
53
|
+
| `make worker` | `scripts/run_worker.py` | RQ worker — **needs Redis running**. Tests use `FakeQueue`, so this is only for real async runs. |
|
|
54
|
+
|
|
55
|
+
## Fixtures / docs / housekeeping
|
|
56
|
+
|
|
57
|
+
| Command | What it does | Notes |
|
|
58
|
+
|---|---|---|
|
|
59
|
+
| `make fixtures` | regenerate synthetic demo data | Deterministic, regenerable (`make_synthetic_deck` + `make_fixtures_{excel,pptx,pdf}`). **Never add real-world data.** |
|
|
60
|
+
| `make docs` | `scripts/build_docs.py` | Static doc site (md→HTML). |
|
|
61
|
+
| `make clean` | remove `__pycache__` + `.pytest_cache` / `.ruff_cache` / `.mypy_cache` | |
|
|
62
|
+
|
|
63
|
+
## Typical loops
|
|
64
|
+
|
|
65
|
+
- **Before pushing:** `make ci` (or just push — the pre-push hook runs it).
|
|
66
|
+
- **TDD inner loop:** `make test` (fast, offline, deterministic).
|
|
67
|
+
- **Touched docs/code contracts:** `make drift`.
|
|
68
|
+
- **Cleaning up style:** `make fmt` then `make lint`.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# Pre-push CI gate: run the local CI before allowing a push, so red code never leaves
|
|
4
|
+
# your machine. This replaces server-side CI (no GitHub Actions minutes used).
|
|
5
|
+
#
|
|
6
|
+
# Enable once per clone: git config core.hooksPath .githooks
|
|
7
|
+
# Bypass in an emergency: git push --no-verify
|
|
8
|
+
#
|
|
9
|
+
exec "$(git rev-parse --show-toplevel)/scripts/ci.sh"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# RAGSpine CI.
|
|
2
|
+
#
|
|
3
|
+
# ⚠️ DORMANT BY DESIGN — manual trigger only (`workflow_dispatch`). This workflow does NOT
|
|
4
|
+
# run on push or pull_request, so it consumes ZERO GitHub Actions minutes automatically.
|
|
5
|
+
# The real gate runs LOCALLY: `scripts/ci.sh`, enforced by the `.githooks/pre-push` hook.
|
|
6
|
+
#
|
|
7
|
+
# To enable server-side CI later (when you have Actions quota), uncomment the `push:` /
|
|
8
|
+
# `pull_request:` triggers below. It runs the exact same `scripts/ci.sh` as your local gate.
|
|
9
|
+
name: CI
|
|
10
|
+
|
|
11
|
+
on:
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
# push:
|
|
14
|
+
# branches: [main]
|
|
15
|
+
# pull_request:
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
ci:
|
|
19
|
+
strategy:
|
|
20
|
+
fail-fast: false
|
|
21
|
+
matrix:
|
|
22
|
+
# Expand to [ubuntu-latest, macos-latest, windows-latest] once enabled, if desired.
|
|
23
|
+
os: [ubuntu-latest]
|
|
24
|
+
python: ["3.10", "3.11", "3.12", "3.13"]
|
|
25
|
+
runs-on: ${{ matrix.os }}
|
|
26
|
+
steps:
|
|
27
|
+
- uses: actions/checkout@v4
|
|
28
|
+
- uses: actions/setup-python@v5
|
|
29
|
+
with:
|
|
30
|
+
python-version: ${{ matrix.python }}
|
|
31
|
+
- name: Install (dev + service + pdf)
|
|
32
|
+
run: |
|
|
33
|
+
python -m pip install -U pip
|
|
34
|
+
pip install -e ".[dev,service,pdf]"
|
|
35
|
+
- name: Run the local CI gate
|
|
36
|
+
run: bash scripts/ci.sh
|
|
37
|
+
env:
|
|
38
|
+
PYTHON: python
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Release → PyPI via Trusted Publishing (OIDC — NO stored token / secret).
|
|
2
|
+
#
|
|
3
|
+
# Trigger: publishing a GitHub Release. The `gate` job runs the exact same checks as
|
|
4
|
+
# `scripts/ci.sh` across the support matrix; only if every leg is green does `publish`
|
|
5
|
+
# build + upload the `rag-spine` distribution.
|
|
6
|
+
#
|
|
7
|
+
# ── One-time PyPI setup (web UI; needs account login) ──────────────────────────────────
|
|
8
|
+
# The `rag-spine` project already exists, so add a trusted publisher on it directly:
|
|
9
|
+
# Manage → rag-spine → Publishing → "Add a new publisher":
|
|
10
|
+
# Owner: VoldemortGin Repository: ragspine Workflow: release.yml Environment: (blank)
|
|
11
|
+
#
|
|
12
|
+
# ── To cut a release ───────────────────────────────────────────────────────────────────
|
|
13
|
+
# Bump `version` in pyproject.toml, commit, then publish a GitHub Release tagged e.g.
|
|
14
|
+
# v0.1.0. The published version is taken from pyproject, not the tag — keep them in sync.
|
|
15
|
+
name: Release
|
|
16
|
+
|
|
17
|
+
on:
|
|
18
|
+
release:
|
|
19
|
+
types: [published]
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
gate:
|
|
23
|
+
strategy:
|
|
24
|
+
fail-fast: true
|
|
25
|
+
matrix:
|
|
26
|
+
python: ["3.10", "3.11", "3.12", "3.13"]
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
- uses: actions/setup-python@v5
|
|
31
|
+
with:
|
|
32
|
+
python-version: ${{ matrix.python }}
|
|
33
|
+
- name: Install (dev + service + pdf)
|
|
34
|
+
run: |
|
|
35
|
+
python -m pip install -U pip
|
|
36
|
+
pip install -e ".[dev,service,pdf]"
|
|
37
|
+
- name: Gate (scripts/ci.sh)
|
|
38
|
+
run: bash scripts/ci.sh
|
|
39
|
+
env:
|
|
40
|
+
PYTHON: python
|
|
41
|
+
|
|
42
|
+
publish:
|
|
43
|
+
needs: gate
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
permissions:
|
|
46
|
+
id-token: write # OIDC — required for Trusted Publishing
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/checkout@v4
|
|
49
|
+
- uses: actions/setup-python@v5
|
|
50
|
+
with:
|
|
51
|
+
python-version: "3.13"
|
|
52
|
+
- name: Build sdist + wheel
|
|
53
|
+
run: |
|
|
54
|
+
python -m pip install -U build
|
|
55
|
+
python -m build
|
|
56
|
+
- name: Publish to PyPI (Trusted Publishing / OIDC)
|
|
57
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Virtualenvs & caches
|
|
2
|
+
.venv/
|
|
3
|
+
venv/
|
|
4
|
+
__pycache__/
|
|
5
|
+
.pytest_cache/
|
|
6
|
+
.ruff_cache/
|
|
7
|
+
.mypy_cache/
|
|
8
|
+
*.pyc
|
|
9
|
+
*.egg-info/
|
|
10
|
+
build/
|
|
11
|
+
dist/
|
|
12
|
+
|
|
13
|
+
# data/ holds regenerable artifacts (synthetic fixtures, sqlite DBs) — ignore its
|
|
14
|
+
# contents, but KEEP the directory present and force-track the version-controlled
|
|
15
|
+
# golden evaluation sets. NOTE: this must be `data/*` (ignore contents), not `data/`
|
|
16
|
+
# (ignore the dir) — git won't descend into a wholesale-excluded directory, so a bare
|
|
17
|
+
# `!data/golden/` would have no effect.
|
|
18
|
+
data/*
|
|
19
|
+
!data/golden/
|
|
20
|
+
!data/.gitkeep
|
|
21
|
+
|
|
22
|
+
# Claude Code: share project settings, ignore local/personal overrides
|
|
23
|
+
.claude/settings.local.json
|
|
24
|
+
.claude/*.local.json
|
|
25
|
+
|
|
26
|
+
# Generated docs (API ref, symbol/dependency indexes) — script-produced, regenerable.
|
|
27
|
+
# Ignore contents but keep the directory present, mirroring the data/ rule above.
|
|
28
|
+
docs/generated/*
|
|
29
|
+
!docs/generated/.gitkeep
|
|
30
|
+
|
|
31
|
+
# Built static doc site (make docs → pdoc API reference); regenerable, nginx-served.
|
|
32
|
+
docs/site/
|
|
33
|
+
|
|
34
|
+
# OS / editor
|
|
35
|
+
.DS_Store
|
|
36
|
+
.idea/
|
|
37
|
+
.vscode/
|
|
38
|
+
|
|
39
|
+
# Local tooling artifacts (eval scratch output)
|
|
40
|
+
.jbeval/
|
|
File without changes
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# CLAUDE.md — RAGSpine
|
|
2
|
+
|
|
3
|
+
Working contract for AI coding sessions (and humans) on this repo. Read `README.md` for the
|
|
4
|
+
full pitch; this file is the operational guide.
|
|
5
|
+
|
|
6
|
+
## What this is
|
|
7
|
+
|
|
8
|
+
RAGSpine — a **framework-free backend RAG engine** (no Dify/LangGraph). Deterministic
|
|
9
|
+
dual-channel (structured numeric + narrative RAG) + agent orchestration, with
|
|
10
|
+
**anti-fabrication** and **source provenance** as code-enforced invariants. Every external
|
|
11
|
+
dependency is a `Protocol`, so the core imports zero SDKs and runs offline.
|
|
12
|
+
|
|
13
|
+
## Layout (deep, domain-grouped — find the file by folder first)
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
src/ragspine/{common, extraction, ingestion, storage, retrieval, agent, eval, service}
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
See the "Architecture" tree in `README.md` for what lives where and the request flow.
|
|
20
|
+
|
|
21
|
+
## Docs map (find docs by folder, like code)
|
|
22
|
+
|
|
23
|
+
Docs are a retrieval system, not a library — at scale no agent reads them whole.
|
|
24
|
+
Full convention in `docs/README.md`. Short version:
|
|
25
|
+
|
|
26
|
+
- **This file** — the always-on routing table. Keep it small; no content lives here.
|
|
27
|
+
- `src/ragspine/<domain>/CLAUDE.md` — per-domain contract, auto-loaded in that subtree.
|
|
28
|
+
- `src/ragspine/<domain>/docs/*.md` — deep dives, pulled by grep / explicit read.
|
|
29
|
+
- `docs/` — cross-cutting: `architecture.md`, `invariants.md`, `glossary.md`, `adr/`.
|
|
30
|
+
- `docs/generated/` — script-produced (API ref, indexes); git-ignored, never hand-edited.
|
|
31
|
+
|
|
32
|
+
Docs describing code carry `covers:` + `verified-against:` frontmatter;
|
|
33
|
+
`scripts/check_doc_drift.py` flags any whose code changed since last verified.
|
|
34
|
+
|
|
35
|
+
## Run (always from the project root)
|
|
36
|
+
|
|
37
|
+
- **Make:** common commands are wrapped in the repo-root `Makefile` — `make help` lists them
|
|
38
|
+
(`make install` · `make ci` · `make test` · `make demo` · `make lint` · `make fmt` ·
|
|
39
|
+
`make drift` · `make hooks` · `make serve`/`make worker` · `make ask Q="…"`). Override the
|
|
40
|
+
interpreter with `make <t> PYTHON=python3.12`. The raw commands below remain the source of truth.
|
|
41
|
+
- **Setup:** `uv venv .venv` then `VIRTUAL_ENV="$(pwd)/.venv" uv pip install -e ".[dev,service]"`
|
|
42
|
+
(the `VIRTUAL_ENV=` prefix is required so `uv` targets this venv, not a system Python).
|
|
43
|
+
Extras: `[pdf]` `[ocr]` `[llm]` `[embed]`.
|
|
44
|
+
- **Tests:** `.venv/bin/python -m pytest tests/ -q` → expect **1078 passed, 1 gpu-skipped**.
|
|
45
|
+
- **CI (local):** `scripts/ci.sh` is the gate (tests + demo smoke); enable the pre-push hook
|
|
46
|
+
once with `git config core.hooksPath .githooks`. GitHub Actions is dormant (manual-only) to
|
|
47
|
+
avoid consuming quota — see README "Continuous integration (local)".
|
|
48
|
+
- **Demo:** `.venv/bin/python scripts/run_demo.py` → expect `ALL CHECKS PASSED`.
|
|
49
|
+
- **Ask:** `.venv/bin/python scripts/ask.py --provider mock --db data/fact_metric.db "中国内地FY2024的REVENUE是多少"`.
|
|
50
|
+
- **Service:** `scripts/run_server.py` (FastAPI) + `scripts/run_worker.py` (RQ, needs Redis).
|
|
51
|
+
|
|
52
|
+
## Conventions
|
|
53
|
+
|
|
54
|
+
- **TDD** — tests are the spec: write the failing test first, then implement to green.
|
|
55
|
+
Never weaken a test to make it pass; if a frozen/regression test breaks, you broke behavior.
|
|
56
|
+
- Python **3.10+** type hints. Import order: **stdlib > third-party > local**. Stay simple;
|
|
57
|
+
match the surrounding style.
|
|
58
|
+
- **Deep, domain-grouped layout** — organize by domain/feature, never by technical layer
|
|
59
|
+
(a *Screaming Architecture* / *package-by-feature* stance): the folder path should locate a
|
|
60
|
+
file before you read its name. Split a package as soon as it holds a second responsibility;
|
|
61
|
+
prefer pushing code into a deeper domain subtree over letting a flat module become a
|
|
62
|
+
catch-all. Depth tracks responsibility — go as deep as each level earns a distinct concern.
|
|
63
|
+
- **Cross-platform** (`pathlib`, no OS-specific calls). Run scripts/tests from the repo root.
|
|
64
|
+
- **Minimal diff** — change only what the task needs; don't refactor unrelated code or delete
|
|
65
|
+
pre-existing dead code.
|
|
66
|
+
|
|
67
|
+
## Invariants (do not break)
|
|
68
|
+
|
|
69
|
+
- **Anti-fabrication** — when the structured channel returns no `found` fact, the orchestrator
|
|
70
|
+
rewrites the answer to "not found" regardless of model output (`agent/agent.py`).
|
|
71
|
+
- **Provenance** — every fact/answer carries `source_doc_id` + locator. Don't drop lineage.
|
|
72
|
+
- **RESTRICTED isolation** — sensitivity-`RESTRICTED` content is filtered at **two** exits
|
|
73
|
+
(`retrieval/link`, `retrieval/rerank`) before it can reach a prompt.
|
|
74
|
+
- **Privacy-aware traces** — `common/observability` records codes/counts/timings only, never
|
|
75
|
+
answer / fact value / chunk text.
|
|
76
|
+
- **Config-driven, no hardcoded company** — identity / metrics / competitors come from
|
|
77
|
+
`CompanyProfile` (`config/company.example.toml`). Don't hardcode a company anywhere.
|
|
78
|
+
- **Pluggability** — LLM / embeddings / reranker / OCR / queue are `Protocol`s, lazy-imported.
|
|
79
|
+
The core runs offline with `MockProvider`.
|
|
80
|
+
|
|
81
|
+
## Demo data
|
|
82
|
+
|
|
83
|
+
The bundled demo uses a **fictional** company (ACME), synthetic figures, and a fictional
|
|
84
|
+
competitor set, generated by `scripts/make_*.py` (deterministic, regenerable). The
|
|
85
|
+
version-controlled golden evaluation sets live under `data/golden/` (force-tracked via the
|
|
86
|
+
`.gitignore` `!data/golden/` rule). **Never add real-world data.**
|
|
87
|
+
|
|
88
|
+
## Service layer
|
|
89
|
+
|
|
90
|
+
`service/`: `ServiceConfig` (env `RAGSPINE_*`), FastAPI app (app factory + dependency
|
|
91
|
+
injection), RQ task queue (`FakeQueue` for tests, `RQQueue` for prod), ingestion jobs
|
|
92
|
+
(worker-owned stores), and the FAQ short-circuit cache. The FAQ layer must keep its
|
|
93
|
+
conservative exclusions (structured-numeric / competitor / real-time / expired / disabled /
|
|
94
|
+
RESTRICTED never short-circuit) — it sits in front of the anti-fabrication guard.
|
rag_spine-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or Derivative
|
|
95
|
+
Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and do
|
|
117
|
+
not modify the License. You may add Your own attribution notices
|
|
118
|
+
within Derivative Works that You distribute, alongside or as an
|
|
119
|
+
addendum to the NOTICE text from the Work, provided that such
|
|
120
|
+
additional attribution notices cannot be construed as modifying
|
|
121
|
+
the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for reasonable and customary use in describing the
|
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
|
168
|
+
or other liability obligations and/or rights consistent with this
|
|
169
|
+
License. However, in accepting such obligations, You may act only
|
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
174
|
+
of your accepting any such warranty or additional liability.
|
|
175
|
+
|
|
176
|
+
END OF TERMS AND CONDITIONS
|
|
177
|
+
|
|
178
|
+
APPENDIX: How to apply the Apache License to your work.
|
|
179
|
+
|
|
180
|
+
To apply the Apache License to your work, attach the following
|
|
181
|
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
|
182
|
+
replaced with your own identifying information. (Don't include
|
|
183
|
+
the brackets!) The text should be enclosed in the appropriate
|
|
184
|
+
comment syntax for the file format. We also recommend that a
|
|
185
|
+
file or class name and description of purpose be included on the
|
|
186
|
+
same "printed page" as the copyright notice for easier
|
|
187
|
+
identification within third-party archives.
|
|
188
|
+
|
|
189
|
+
Copyright 2026 The RAGSpine Authors
|
|
190
|
+
|
|
191
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
|
+
you may not use this file except in compliance with the License.
|
|
193
|
+
You may obtain a copy of the License at
|
|
194
|
+
|
|
195
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
196
|
+
|
|
197
|
+
Unless required by applicable law or agreed to in writing, software
|
|
198
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
199
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
200
|
+
See the License for the specific language governing permissions and
|
|
201
|
+
limitations under the License.
|
rag_spine-0.1.1/Makefile
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# RAGSpine — common dev / CI-CD commands.
|
|
2
|
+
#
|
|
3
|
+
# `make` or `make help` lists every target. Targets honour the project venv by default;
|
|
4
|
+
# override the interpreter with: make test PYTHON=python3.12
|
|
5
|
+
#
|
|
6
|
+
# Always run from the repo root (the scripts anchor on .project-root).
|
|
7
|
+
|
|
8
|
+
.DEFAULT_GOAL := help
|
|
9
|
+
PYTHON ?= .venv/bin/python
|
|
10
|
+
VENV ?= .venv
|
|
11
|
+
|
|
12
|
+
# ---- setup ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
.PHONY: venv
|
|
15
|
+
venv: ## Create the project virtualenv (.venv) with uv
|
|
16
|
+
uv venv $(VENV)
|
|
17
|
+
|
|
18
|
+
.PHONY: install
|
|
19
|
+
install: ## Editable install with dev+service extras (the usual dev setup)
|
|
20
|
+
VIRTUAL_ENV="$(CURDIR)/$(VENV)" uv pip install -e ".[dev,service]"
|
|
21
|
+
|
|
22
|
+
.PHONY: install-all
|
|
23
|
+
install-all: ## Editable install with all non-GPU extras (dev,service,llm,embed)
|
|
24
|
+
VIRTUAL_ENV="$(CURDIR)/$(VENV)" uv pip install -e ".[dev,service,llm,embed]"
|
|
25
|
+
|
|
26
|
+
.PHONY: hooks
|
|
27
|
+
hooks: ## Enable the pre-push CI gate (one-time, per clone)
|
|
28
|
+
git config core.hooksPath .githooks
|
|
29
|
+
@echo "pre-push CI gate enabled (emergency bypass: git push --no-verify)"
|
|
30
|
+
|
|
31
|
+
# ---- quality gate --------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
.PHONY: ci
|
|
34
|
+
ci: ## Local CI gate: tests + demo smoke (exactly what pre-push runs)
|
|
35
|
+
PYTHON=$(PYTHON) scripts/ci.sh
|
|
36
|
+
|
|
37
|
+
.PHONY: test
|
|
38
|
+
test: ## Run the test suite (excludes gpu-marked tests)
|
|
39
|
+
$(PYTHON) -m pytest tests/ -q -m "not gpu"
|
|
40
|
+
|
|
41
|
+
.PHONY: test-all
|
|
42
|
+
test-all: ## Run the full suite including gpu-marked tests
|
|
43
|
+
$(PYTHON) -m pytest tests/ -q
|
|
44
|
+
|
|
45
|
+
.PHONY: lint
|
|
46
|
+
lint: ## ruff + mypy (informational, non-blocking)
|
|
47
|
+
PYTHON=$(PYTHON) scripts/lint.sh
|
|
48
|
+
|
|
49
|
+
.PHONY: fmt
|
|
50
|
+
fmt: ## Auto-fix lint and format with ruff
|
|
51
|
+
$(PYTHON) -m ruff check --fix src/ragspine scripts tests
|
|
52
|
+
$(PYTHON) -m ruff format src/ragspine scripts tests
|
|
53
|
+
|
|
54
|
+
.PHONY: drift
|
|
55
|
+
drift: ## Flag docs whose covered code changed since last verified
|
|
56
|
+
$(PYTHON) scripts/check_doc_drift.py
|
|
57
|
+
|
|
58
|
+
# ---- demo / eval ---------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
.PHONY: demo
|
|
61
|
+
demo: ## Run the offline end-to-end demo (expects ALL CHECKS PASSED)
|
|
62
|
+
$(PYTHON) scripts/run_demo.py
|
|
63
|
+
|
|
64
|
+
.PHONY: ask
|
|
65
|
+
ask: ## Ask offline, e.g. make ask Q="中国内地FY2024的REVENUE是多少"
|
|
66
|
+
$(PYTHON) scripts/ask.py --provider mock --db data/fact_metric.db "$(Q)"
|
|
67
|
+
|
|
68
|
+
.PHONY: eval-qa
|
|
69
|
+
eval-qa: ## QA evaluation against the golden set (baseline-gated)
|
|
70
|
+
$(PYTHON) scripts/run_qa_eval.py
|
|
71
|
+
|
|
72
|
+
.PHONY: eval-retrieval
|
|
73
|
+
eval-retrieval: ## BM25-vs-hybrid retrieval A/B harness
|
|
74
|
+
$(PYTHON) scripts/eval_retrieval_ab.py
|
|
75
|
+
|
|
76
|
+
# ---- service -------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
.PHONY: serve
|
|
79
|
+
serve: ## Run the FastAPI server
|
|
80
|
+
$(PYTHON) scripts/run_server.py
|
|
81
|
+
|
|
82
|
+
.PHONY: worker
|
|
83
|
+
worker: ## Run the RQ worker (needs Redis)
|
|
84
|
+
$(PYTHON) scripts/run_worker.py
|
|
85
|
+
|
|
86
|
+
# ---- fixtures / docs -----------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
.PHONY: fixtures
|
|
89
|
+
fixtures: ## Regenerate the synthetic demo fixtures (deterministic)
|
|
90
|
+
$(PYTHON) scripts/make_synthetic_deck.py
|
|
91
|
+
$(PYTHON) scripts/make_fixtures_excel.py
|
|
92
|
+
$(PYTHON) scripts/make_fixtures_pptx.py
|
|
93
|
+
$(PYTHON) scripts/make_fixtures_pdf.py
|
|
94
|
+
|
|
95
|
+
.PHONY: docs
|
|
96
|
+
docs: ## Build the static API-reference site from docstrings (pdoc → docs/site, nginx-ready)
|
|
97
|
+
$(PYTHON) -m pdoc ragspine -o docs/site
|
|
98
|
+
@echo "API docs → docs/site/ (deploy: point nginx 'root' at $(abspath docs/site))"
|
|
99
|
+
|
|
100
|
+
.PHONY: clean
|
|
101
|
+
clean: ## Remove caches (pyc, pytest/ruff/mypy) and build artifacts
|
|
102
|
+
find . -type d -name __pycache__ -prune -exec rm -rf {} +
|
|
103
|
+
rm -rf .pytest_cache .ruff_cache .mypy_cache dist build *.egg-info
|
|
104
|
+
|
|
105
|
+
# ---- release -------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
.PHONY: build
|
|
108
|
+
build: ## Build wheel + sdist into dist/ and validate with twine check
|
|
109
|
+
rm -rf dist
|
|
110
|
+
$(PYTHON) -m build
|
|
111
|
+
$(PYTHON) -m twine check dist/*
|
|
112
|
+
|
|
113
|
+
.PHONY: publish-test
|
|
114
|
+
publish-test: build ## Build, then upload to TestPyPI (rehearse a real release; needs a TestPyPI token)
|
|
115
|
+
$(PYTHON) -m twine upload --repository testpypi dist/*
|
|
116
|
+
|
|
117
|
+
.PHONY: publish
|
|
118
|
+
publish: build ## Build, then upload to PyPI (real release; needs a PyPI token)
|
|
119
|
+
$(PYTHON) -m twine upload dist/*
|
|
120
|
+
|
|
121
|
+
# ---- meta ----------------------------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
.PHONY: help
|
|
124
|
+
help: ## List available targets
|
|
125
|
+
@grep -hE '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) \
|
|
126
|
+
| sort \
|
|
127
|
+
| awk 'BEGIN{FS=":.*?## "}{printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'
|