rag-spine 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. rag_spine-0.1.1/.claude/settings.json +22 -0
  2. rag_spine-0.1.1/.claude/skills/ragspine-make/SKILL.md +68 -0
  3. rag_spine-0.1.1/.githooks/pre-push +9 -0
  4. rag_spine-0.1.1/.github/workflows/ci.yml +38 -0
  5. rag_spine-0.1.1/.github/workflows/release.yml +57 -0
  6. rag_spine-0.1.1/.gitignore +40 -0
  7. rag_spine-0.1.1/.project-root +0 -0
  8. rag_spine-0.1.1/CLAUDE.md +94 -0
  9. rag_spine-0.1.1/LICENSE +201 -0
  10. rag_spine-0.1.1/Makefile +127 -0
  11. rag_spine-0.1.1/NOTICE +9 -0
  12. rag_spine-0.1.1/PKG-INFO +308 -0
  13. rag_spine-0.1.1/README.md +250 -0
  14. rag_spine-0.1.1/config/company.example.toml +52 -0
  15. rag_spine-0.1.1/data/.gitkeep +0 -0
  16. rag_spine-0.1.1/data/golden/qa_baseline.json +22 -0
  17. rag_spine-0.1.1/data/golden/qa_golden_set.jsonl +41 -0
  18. rag_spine-0.1.1/data/golden/query_metric_tool_schema.json +65 -0
  19. rag_spine-0.1.1/data/golden/retrieval_ab_corpus.jsonl +16 -0
  20. rag_spine-0.1.1/data/golden/retrieval_ab_real.jsonl +12 -0
  21. rag_spine-0.1.1/data/golden/retrieval_ab_sample.jsonl +3 -0
  22. rag_spine-0.1.1/docs/README.md +68 -0
  23. rag_spine-0.1.1/docs/adr/0001-dual-channel-determinism.md +25 -0
  24. rag_spine-0.1.1/docs/adr/0002-product-direction.md +76 -0
  25. rag_spine-0.1.1/docs/adr/0003-audience-oss-library.md +44 -0
  26. rag_spine-0.1.1/docs/adr/0004-domain-profile-generalization.md +48 -0
  27. rag_spine-0.1.1/docs/adr/0005-lean-core-experimental-isolation.md +47 -0
  28. rag_spine-0.1.1/docs/adr/0006-quality-bar-invariants-and-benchmark.md +51 -0
  29. rag_spine-0.1.1/docs/adr/0007-multilingual-architect-for-five-ship-two.md +46 -0
  30. rag_spine-0.1.1/docs/adr/0008-prompt-registry-packaging.md +44 -0
  31. rag_spine-0.1.1/docs/adr/0009-dependency-and-framework-policy.md +53 -0
  32. rag_spine-0.1.1/docs/adr/0010-intent-parser-security-decoupling.md +57 -0
  33. rag_spine-0.1.1/docs/adr/0011-python-project-standard-divergences.md +82 -0
  34. rag_spine-0.1.1/docs/architecture.md +24 -0
  35. rag_spine-0.1.1/docs/generated/.gitkeep +0 -0
  36. rag_spine-0.1.1/docs/glossary.md +13 -0
  37. rag_spine-0.1.1/docs/invariants.md +30 -0
  38. rag_spine-0.1.1/docs/prd-breadth-via-adapters.md +246 -0
  39. rag_spine-0.1.1/docs/prd-pipeline-topology-export.md +181 -0
  40. rag_spine-0.1.1/docs/prd-vector-store-seam.md +202 -0
  41. rag_spine-0.1.1/pyproject.toml +156 -0
  42. rag_spine-0.1.1/scripts/ask.py +100 -0
  43. rag_spine-0.1.1/scripts/build_docs.py +528 -0
  44. rag_spine-0.1.1/scripts/check_doc_drift.py +143 -0
  45. rag_spine-0.1.1/scripts/check_docstring_refs.py +202 -0
  46. rag_spine-0.1.1/scripts/ci.sh +44 -0
  47. rag_spine-0.1.1/scripts/classify_pdfs.py +135 -0
  48. rag_spine-0.1.1/scripts/eval_retrieval_ab.py +280 -0
  49. rag_spine-0.1.1/scripts/ingest.py +139 -0
  50. rag_spine-0.1.1/scripts/ingest_narrative.py +84 -0
  51. rag_spine-0.1.1/scripts/lint.sh +24 -0
  52. rag_spine-0.1.1/scripts/make_fixtures_excel.py +424 -0
  53. rag_spine-0.1.1/scripts/make_fixtures_pdf.py +477 -0
  54. rag_spine-0.1.1/scripts/make_fixtures_pptx.py +457 -0
  55. rag_spine-0.1.1/scripts/make_synthetic_deck.py +177 -0
  56. rag_spine-0.1.1/scripts/run_demo.py +168 -0
  57. rag_spine-0.1.1/scripts/run_qa_eval.py +104 -0
  58. rag_spine-0.1.1/scripts/run_server.py +39 -0
  59. rag_spine-0.1.1/scripts/run_worker.py +40 -0
  60. rag_spine-0.1.1/scripts/topology.py +94 -0
  61. rag_spine-0.1.1/src/ragspine/__init__.py +30 -0
  62. rag_spine-0.1.1/src/ragspine/agent/CLAUDE.md +87 -0
  63. rag_spine-0.1.1/src/ragspine/agent/__init__.py +12 -0
  64. rag_spine-0.1.1/src/ragspine/agent/agent.py +544 -0
  65. rag_spine-0.1.1/src/ragspine/agent/intent.py +403 -0
  66. rag_spine-0.1.1/src/ragspine/agent/llm_provider.py +265 -0
  67. rag_spine-0.1.1/src/ragspine/agent/query_tools.py +252 -0
  68. rag_spine-0.1.1/src/ragspine/agent/security_gate.py +116 -0
  69. rag_spine-0.1.1/src/ragspine/common/CLAUDE.md +30 -0
  70. rag_spine-0.1.1/src/ragspine/common/__init__.py +12 -0
  71. rag_spine-0.1.1/src/ragspine/common/company_profile.py +370 -0
  72. rag_spine-0.1.1/src/ragspine/common/core.py +23 -0
  73. rag_spine-0.1.1/src/ragspine/common/glossary.py +210 -0
  74. rag_spine-0.1.1/src/ragspine/common/observability.py +32 -0
  75. rag_spine-0.1.1/src/ragspine/common/sensitivity.py +56 -0
  76. rag_spine-0.1.1/src/ragspine/eval/CLAUDE.md +37 -0
  77. rag_spine-0.1.1/src/ragspine/eval/__init__.py +9 -0
  78. rag_spine-0.1.1/src/ragspine/eval/extraction_eval.py +154 -0
  79. rag_spine-0.1.1/src/ragspine/eval/qa_eval.py +717 -0
  80. rag_spine-0.1.1/src/ragspine/extraction/CLAUDE.md +36 -0
  81. rag_spine-0.1.1/src/ragspine/extraction/__init__.py +12 -0
  82. rag_spine-0.1.1/src/ragspine/extraction/color/__init__.py +7 -0
  83. rag_spine-0.1.1/src/ragspine/extraction/color/color_semantics.py +322 -0
  84. rag_spine-0.1.1/src/ragspine/extraction/extractors/__init__.py +13 -0
  85. rag_spine-0.1.1/src/ragspine/extraction/extractors/pdf_digital_extractor.py +228 -0
  86. rag_spine-0.1.1/src/ragspine/extraction/extractors/pdf_scanned_extractor.py +447 -0
  87. rag_spine-0.1.1/src/ragspine/extraction/extractors/pptx_extractor.py +210 -0
  88. rag_spine-0.1.1/src/ragspine/extraction/extractors/pptx_styled_extractor.py +357 -0
  89. rag_spine-0.1.1/src/ragspine/extraction/extractors/xlsx_extractor.py +100 -0
  90. rag_spine-0.1.1/src/ragspine/extraction/extractors/xlsx_styled_extractor.py +188 -0
  91. rag_spine-0.1.1/src/ragspine/extraction/ir.py +111 -0
  92. rag_spine-0.1.1/src/ragspine/extraction/routing/__init__.py +7 -0
  93. rag_spine-0.1.1/src/ragspine/extraction/routing/pdf_router.py +230 -0
  94. rag_spine-0.1.1/src/ragspine/extraction/verification/__init__.py +7 -0
  95. rag_spine-0.1.1/src/ragspine/extraction/verification/dual_channel_verifier.py +216 -0
  96. rag_spine-0.1.1/src/ragspine/ingestion/CLAUDE.md +29 -0
  97. rag_spine-0.1.1/src/ragspine/ingestion/__init__.py +9 -0
  98. rag_spine-0.1.1/src/ragspine/ingestion/narrative/__init__.py +8 -0
  99. rag_spine-0.1.1/src/ragspine/ingestion/narrative/narrative_extract.py +152 -0
  100. rag_spine-0.1.1/src/ragspine/ingestion/narrative/narrative_ingest.py +284 -0
  101. rag_spine-0.1.1/src/ragspine/ingestion/review/__init__.py +7 -0
  102. rag_spine-0.1.1/src/ragspine/ingestion/review/review_queue.py +291 -0
  103. rag_spine-0.1.1/src/ragspine/ingestion/structured/__init__.py +9 -0
  104. rag_spine-0.1.1/src/ragspine/ingestion/structured/ingestion.py +628 -0
  105. rag_spine-0.1.1/src/ragspine/ingestion/structured/ingestion_manifest.py +317 -0
  106. rag_spine-0.1.1/src/ragspine/pipeline/CLAUDE.md +67 -0
  107. rag_spine-0.1.1/src/ragspine/pipeline/__init__.py +29 -0
  108. rag_spine-0.1.1/src/ragspine/pipeline/graph.py +158 -0
  109. rag_spine-0.1.1/src/ragspine/pipeline/topology.py +213 -0
  110. rag_spine-0.1.1/src/ragspine/py.typed +0 -0
  111. rag_spine-0.1.1/src/ragspine/retrieval/CLAUDE.md +32 -0
  112. rag_spine-0.1.1/src/ragspine/retrieval/__init__.py +12 -0
  113. rag_spine-0.1.1/src/ragspine/retrieval/chunking/__init__.py +8 -0
  114. rag_spine-0.1.1/src/ragspine/retrieval/chunking/chunk_store.py +211 -0
  115. rag_spine-0.1.1/src/ragspine/retrieval/chunking/chunking.py +202 -0
  116. rag_spine-0.1.1/src/ragspine/retrieval/lexical/__init__.py +7 -0
  117. rag_spine-0.1.1/src/ragspine/retrieval/lexical/retrieval.py +422 -0
  118. rag_spine-0.1.1/src/ragspine/retrieval/link/__init__.py +7 -0
  119. rag_spine-0.1.1/src/ragspine/retrieval/link/narrative_link.py +145 -0
  120. rag_spine-0.1.1/src/ragspine/retrieval/rerank/__init__.py +7 -0
  121. rag_spine-0.1.1/src/ragspine/retrieval/rerank/listwise_rerank.py +135 -0
  122. rag_spine-0.1.1/src/ragspine/retrieval/vector/__init__.py +9 -0
  123. rag_spine-0.1.1/src/ragspine/retrieval/vector/embedding_backends.py +314 -0
  124. rag_spine-0.1.1/src/ragspine/retrieval/vector/store.py +171 -0
  125. rag_spine-0.1.1/src/ragspine/service/CLAUDE.md +30 -0
  126. rag_spine-0.1.1/src/ragspine/service/__init__.py +11 -0
  127. rag_spine-0.1.1/src/ragspine/service/api/__init__.py +11 -0
  128. rag_spine-0.1.1/src/ragspine/service/api/app.py +38 -0
  129. rag_spine-0.1.1/src/ragspine/service/api/dependencies.py +29 -0
  130. rag_spine-0.1.1/src/ragspine/service/api/routes.py +291 -0
  131. rag_spine-0.1.1/src/ragspine/service/api/schemas.py +78 -0
  132. rag_spine-0.1.1/src/ragspine/service/config.py +120 -0
  133. rag_spine-0.1.1/src/ragspine/service/faq/__init__.py +8 -0
  134. rag_spine-0.1.1/src/ragspine/service/faq/faq_cache.py +170 -0
  135. rag_spine-0.1.1/src/ragspine/service/tasks/__init__.py +8 -0
  136. rag_spine-0.1.1/src/ragspine/service/tasks/jobs.py +173 -0
  137. rag_spine-0.1.1/src/ragspine/service/tasks/task_queue.py +202 -0
  138. rag_spine-0.1.1/src/ragspine/storage/CLAUDE.md +42 -0
  139. rag_spine-0.1.1/src/ragspine/storage/__init__.py +7 -0
  140. rag_spine-0.1.1/src/ragspine/storage/fact_store.py +358 -0
  141. rag_spine-0.1.1/tests/__init__.py +0 -0
  142. rag_spine-0.1.1/tests/agent/__init__.py +0 -0
  143. rag_spine-0.1.1/tests/agent/test_agent_orchestrator.py +263 -0
  144. rag_spine-0.1.1/tests/agent/test_composite.py +265 -0
  145. rag_spine-0.1.1/tests/agent/test_external_entity_guard.py +571 -0
  146. rag_spine-0.1.1/tests/agent/test_intent.py +221 -0
  147. rag_spine-0.1.1/tests/agent/test_intent_parser.py +138 -0
  148. rag_spine-0.1.1/tests/agent/test_llm_provider.py +221 -0
  149. rag_spine-0.1.1/tests/agent/test_query_tools_schema.py +119 -0
  150. rag_spine-0.1.1/tests/agent/test_security_gate.py +145 -0
  151. rag_spine-0.1.1/tests/common/__init__.py +0 -0
  152. rag_spine-0.1.1/tests/common/test_company_generalization.py +335 -0
  153. rag_spine-0.1.1/tests/common/test_company_profile.py +75 -0
  154. rag_spine-0.1.1/tests/common/test_domain_profile.py +87 -0
  155. rag_spine-0.1.1/tests/common/test_domain_profile_vocab_frozen.py +39 -0
  156. rag_spine-0.1.1/tests/common/test_observability_resilience.py +369 -0
  157. rag_spine-0.1.1/tests/common/test_sensitivity.py +373 -0
  158. rag_spine-0.1.1/tests/conformance/__init__.py +0 -0
  159. rag_spine-0.1.1/tests/conformance/conftest.py +69 -0
  160. rag_spine-0.1.1/tests/conformance/test_vector_store_contract.py +291 -0
  161. rag_spine-0.1.1/tests/conformance/test_vector_store_invariants.py +112 -0
  162. rag_spine-0.1.1/tests/conftest.py +175 -0
  163. rag_spine-0.1.1/tests/e2e/__init__.py +0 -0
  164. rag_spine-0.1.1/tests/e2e/test_ask_e2e.py +86 -0
  165. rag_spine-0.1.1/tests/e2e/test_e2e.py +190 -0
  166. rag_spine-0.1.1/tests/e2e/test_fixture_regeneration.py +300 -0
  167. rag_spine-0.1.1/tests/eval/__init__.py +0 -0
  168. rag_spine-0.1.1/tests/eval/test_extraction_eval.py +354 -0
  169. rag_spine-0.1.1/tests/eval/test_lab_domain.py +401 -0
  170. rag_spine-0.1.1/tests/eval/test_qa_eval.py +598 -0
  171. rag_spine-0.1.1/tests/eval/test_qa_eval_fabrication_frozen.py +21 -0
  172. rag_spine-0.1.1/tests/extraction/__init__.py +0 -0
  173. rag_spine-0.1.1/tests/extraction/color/__init__.py +0 -0
  174. rag_spine-0.1.1/tests/extraction/color/test_color_semantics.py +341 -0
  175. rag_spine-0.1.1/tests/extraction/extractors/__init__.py +0 -0
  176. rag_spine-0.1.1/tests/extraction/extractors/test_pdf_digital_extractor.py +260 -0
  177. rag_spine-0.1.1/tests/extraction/extractors/test_pdf_scanned_extractor.py +455 -0
  178. rag_spine-0.1.1/tests/extraction/extractors/test_pptx_styled_extractor.py +479 -0
  179. rag_spine-0.1.1/tests/extraction/extractors/test_scanned_gpu.py +243 -0
  180. rag_spine-0.1.1/tests/extraction/extractors/test_xlsx_styled_extractor.py +313 -0
  181. rag_spine-0.1.1/tests/extraction/routing/__init__.py +0 -0
  182. rag_spine-0.1.1/tests/extraction/routing/test_pdf_router.py +461 -0
  183. rag_spine-0.1.1/tests/extraction/test_ir.py +264 -0
  184. rag_spine-0.1.1/tests/extraction/verification/__init__.py +0 -0
  185. rag_spine-0.1.1/tests/extraction/verification/test_dual_channel_verifier.py +422 -0
  186. rag_spine-0.1.1/tests/ingestion/__init__.py +0 -0
  187. rag_spine-0.1.1/tests/ingestion/narrative/__init__.py +0 -0
  188. rag_spine-0.1.1/tests/ingestion/narrative/test_narrative_extract.py +214 -0
  189. rag_spine-0.1.1/tests/ingestion/narrative/test_narrative_ingest.py +298 -0
  190. rag_spine-0.1.1/tests/ingestion/review/__init__.py +0 -0
  191. rag_spine-0.1.1/tests/ingestion/review/test_review_queue.py +378 -0
  192. rag_spine-0.1.1/tests/ingestion/structured/__init__.py +0 -0
  193. rag_spine-0.1.1/tests/ingestion/structured/test_ingestion.py +272 -0
  194. rag_spine-0.1.1/tests/ingestion/structured/test_manifest.py +348 -0
  195. rag_spine-0.1.1/tests/ingestion/test_ingest_cli.py +245 -0
  196. rag_spine-0.1.1/tests/ingestion/test_ingest_dispatch.py +679 -0
  197. rag_spine-0.1.1/tests/pipeline/__init__.py +0 -0
  198. rag_spine-0.1.1/tests/pipeline/test_cli.py +102 -0
  199. rag_spine-0.1.1/tests/pipeline/test_graph.py +192 -0
  200. rag_spine-0.1.1/tests/pipeline/test_topology.py +331 -0
  201. rag_spine-0.1.1/tests/retrieval/__init__.py +0 -0
  202. rag_spine-0.1.1/tests/retrieval/chunking/__init__.py +0 -0
  203. rag_spine-0.1.1/tests/retrieval/chunking/test_chunk_store.py +193 -0
  204. rag_spine-0.1.1/tests/retrieval/chunking/test_chunking.py +203 -0
  205. rag_spine-0.1.1/tests/retrieval/lexical/__init__.py +0 -0
  206. rag_spine-0.1.1/tests/retrieval/lexical/test_retrieval.py +449 -0
  207. rag_spine-0.1.1/tests/retrieval/link/__init__.py +0 -0
  208. rag_spine-0.1.1/tests/retrieval/link/test_narrative_link.py +420 -0
  209. rag_spine-0.1.1/tests/retrieval/rerank/__init__.py +0 -0
  210. rag_spine-0.1.1/tests/retrieval/rerank/test_listwise_rerank.py +265 -0
  211. rag_spine-0.1.1/tests/retrieval/test_retrieval_ab.py +271 -0
  212. rag_spine-0.1.1/tests/retrieval/vector/__init__.py +0 -0
  213. rag_spine-0.1.1/tests/retrieval/vector/test_embedding_backends.py +195 -0
  214. rag_spine-0.1.1/tests/retrieval/vector/test_embedding_deterministic.py +374 -0
  215. rag_spine-0.1.1/tests/retrieval/vector/test_embedding_device.py +269 -0
  216. rag_spine-0.1.1/tests/service/__init__.py +0 -0
  217. rag_spine-0.1.1/tests/service/api/__init__.py +0 -0
  218. rag_spine-0.1.1/tests/service/api/test_api_ask.py +305 -0
  219. rag_spine-0.1.1/tests/service/api/test_api_health.py +85 -0
  220. rag_spine-0.1.1/tests/service/api/test_api_jobs.py +241 -0
  221. rag_spine-0.1.1/tests/service/faq/__init__.py +0 -0
  222. rag_spine-0.1.1/tests/service/faq/test_faq_cache.py +244 -0
  223. rag_spine-0.1.1/tests/service/tasks/__init__.py +0 -0
  224. rag_spine-0.1.1/tests/service/tasks/test_ingest_jobs.py +191 -0
  225. rag_spine-0.1.1/tests/service/tasks/test_task_queue.py +134 -0
  226. rag_spine-0.1.1/tests/service/test_service_config.py +216 -0
  227. rag_spine-0.1.1/tests/service/test_service_integration.py +73 -0
  228. rag_spine-0.1.1/tests/storage/__init__.py +0 -0
  229. rag_spine-0.1.1/tests/storage/test_dim_key.py +190 -0
  230. rag_spine-0.1.1/tests/storage/test_fact_store_v2.py +342 -0
  231. rag_spine-0.1.1/tests/storage/test_fact_temporal.py +406 -0
  232. rag_spine-0.1.1/uv.lock +5782 -0
@@ -0,0 +1,22 @@
1
+ {
2
+ "$schema": "https://json.schemastore.org/claude-code-settings.json",
3
+ "permissions": {
4
+ "allow": [
5
+ "Bash(.venv/bin/python:*)",
6
+ "Bash(.venv/bin/python -m pytest:*)",
7
+ "Bash(python:*)",
8
+ "Bash(python3:*)",
9
+ "Bash(uv venv:*)",
10
+ "Bash(uv pip:*)",
11
+ "Bash(uv run:*)",
12
+ "Bash(ruff:*)",
13
+ "Bash(mypy:*)",
14
+ "Bash(git status:*)",
15
+ "Bash(git diff:*)",
16
+ "Bash(git log:*)",
17
+ "Bash(git show:*)",
18
+ "Bash(git branch:*)"
19
+ ],
20
+ "deny": []
21
+ }
22
+ }
@@ -0,0 +1,68 @@
1
+ ---
2
+ name: ragspine-make
3
+ description: "How to run dev / CI-CD tasks in the RAGSpine repo via the Makefile. Use whenever you need to install deps, run tests, run the local CI gate, lint/format, check doc drift, run the demo, ask a question, run evals, start the server/worker, or regenerate fixtures in this project. Keywords: make, Makefile, run tests, pytest, CI, ci gate, pre-push, lint, ruff, mypy, format, doc drift, demo, ask, eval, qa eval, retrieval ab, serve, FastAPI, worker, RQ, redis, fixtures, install, venv, uv, hooks, build docs, 跑测试, 跑CI, 本地CI, 格式化, 漂移, 评估, 启动服务, 安装依赖, 怎么运行, 命令"
4
+ allowed-tools: ["Bash", "Read"]
5
+ ---
6
+
7
+ # RAGSpine — dev / CI-CD commands (Makefile)
8
+
9
+ The repo wraps common commands in a root `Makefile`. **Always run from the repo root**
10
+ (scripts anchor on `.project-root`). `make help` lists every target. The raw commands
11
+ live in `scripts/`; the Makefile is a thin, discoverable wrapper over them.
12
+
13
+ **Interpreter:** every target defaults to `.venv/bin/python`. Override with
14
+ `make <target> PYTHON=python3.12` (passed through to `scripts/ci.sh` / `lint.sh` too).
15
+
16
+ ## First-time setup
17
+
18
+ | Command | What it does | Notes |
19
+ |---|---|---|
20
+ | `make venv` | `uv venv .venv` | Creates the project venv. |
21
+ | `make install` | editable install, `[dev,service]` extras | The usual dev setup. |
22
+ | `make install-all` | editable install, `[dev,service,llm,embed]` | Adds real LLM + embedding backends (heavier). `[pdf]`/`[ocr]` are platform-specific and intentionally excluded. |
23
+ | `make hooks` | `git config core.hooksPath .githooks` | **One-time per clone.** Enables the pre-push CI gate so red code never leaves the machine. Emergency bypass: `git push --no-verify`. |
24
+
25
+ ## The quality gate
26
+
27
+ | Command | What it does | Notes |
28
+ |---|---|---|
29
+ | `make ci` | tests (`-m "not gpu"`) + demo smoke | **The gate** — exactly what the pre-push hook runs (`scripts/ci.sh`). This is the single source of truth for "is it green". |
30
+ | `make test` | `pytest tests/ -q -m "not gpu"` | Expect **943 passed, 1 gpu-skipped**. |
31
+ | `make test-all` | full suite incl. `gpu` marker | Needs Ubuntu + NVIDIA GPU + real OCR model; skipped/failing elsewhere. |
32
+ | `make lint` | ruff check + ruff format --check + mypy | **Informational, non-blocking** (always exits 0). The inherited codebase predates linting; adopt fixes incrementally — it is *not* wired into the CI gate yet. |
33
+ | `make fmt` | `ruff check --fix` + `ruff format` | Auto-fix lint + format `ragspine scripts tests`. |
34
+ | `make drift` | `scripts/check_doc_drift.py` | Flags docs whose covered code changed since `verified-against`. Expect **10 tracked, 0 stale**. See `docs/README.md` for the convention. |
35
+
36
+ GitHub Actions is dormant (manual-trigger only) to avoid consuming minutes — the local
37
+ gate is authoritative.
38
+
39
+ ## Demo / ask / eval
40
+
41
+ | Command | What it does | Notes |
42
+ |---|---|---|
43
+ | `make demo` | `scripts/run_demo.py` | Offline end-to-end; expect `ALL CHECKS PASSED`. |
44
+ | `make ask Q="…"` | offline question via MockProvider | e.g. `make ask Q="中国内地FY2024的REVENUE是多少"`. Uses `--provider mock --db data/fact_metric.db`. Ask for missing data → honest refusal, never a guess. |
45
+ | `make eval-qa` | `scripts/run_qa_eval.py` | QA harness, **baseline-gated** (fails on regression below the frozen floor; don't launder a regression with `--update-baseline`). |
46
+ | `make eval-retrieval` | `scripts/eval_retrieval_ab.py` | BM25-vs-hybrid Recall@k / MRR. Default gold is synthetic (proves the harness math, not real recall). |
47
+
48
+ ## Service
49
+
50
+ | Command | What it does | Notes |
51
+ |---|---|---|
52
+ | `make serve` | `scripts/run_server.py` | FastAPI app. |
53
+ | `make worker` | `scripts/run_worker.py` | RQ worker — **needs Redis running**. Tests use `FakeQueue`, so this is only for real async runs. |
54
+
55
+ ## Fixtures / docs / housekeeping
56
+
57
+ | Command | What it does | Notes |
58
+ |---|---|---|
59
+ | `make fixtures` | regenerate synthetic demo data | Deterministic, regenerable (`make_synthetic_deck` + `make_fixtures_{excel,pptx,pdf}`). **Never add real-world data.** |
60
+ | `make docs` | `scripts/build_docs.py` | Static doc site (md→HTML). |
61
+ | `make clean` | remove `__pycache__` + `.pytest_cache` / `.ruff_cache` / `.mypy_cache` | |
62
+
63
+ ## Typical loops
64
+
65
+ - **Before pushing:** `make ci` (or just push — the pre-push hook runs it).
66
+ - **TDD inner loop:** `make test` (fast, offline, deterministic).
67
+ - **Touched docs/code contracts:** `make drift`.
68
+ - **Cleaning up style:** `make fmt` then `make lint`.
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Pre-push CI gate: run the local CI before allowing a push, so red code never leaves
4
+ # your machine. This replaces server-side CI (no GitHub Actions minutes used).
5
+ #
6
+ # Enable once per clone: git config core.hooksPath .githooks
7
+ # Bypass in an emergency: git push --no-verify
8
+ #
9
+ exec "$(git rev-parse --show-toplevel)/scripts/ci.sh"
@@ -0,0 +1,38 @@
1
+ # RAGSpine CI.
2
+ #
3
+ # ⚠️ DORMANT BY DESIGN — manual trigger only (`workflow_dispatch`). This workflow does NOT
4
+ # run on push or pull_request, so it consumes ZERO GitHub Actions minutes automatically.
5
+ # The real gate runs LOCALLY: `scripts/ci.sh`, enforced by the `.githooks/pre-push` hook.
6
+ #
7
+ # To enable server-side CI later (when you have Actions quota), uncomment the `push:` /
8
+ # `pull_request:` triggers below. It runs the exact same `scripts/ci.sh` as your local gate.
9
+ name: CI
10
+
11
+ on:
12
+ workflow_dispatch:
13
+ # push:
14
+ # branches: [main]
15
+ # pull_request:
16
+
17
+ jobs:
18
+ ci:
19
+ strategy:
20
+ fail-fast: false
21
+ matrix:
22
+ # Expand to [ubuntu-latest, macos-latest, windows-latest] once enabled, if desired.
23
+ os: [ubuntu-latest]
24
+ python: ["3.10", "3.11", "3.12", "3.13"]
25
+ runs-on: ${{ matrix.os }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - uses: actions/setup-python@v5
29
+ with:
30
+ python-version: ${{ matrix.python }}
31
+ - name: Install (dev + service + pdf)
32
+ run: |
33
+ python -m pip install -U pip
34
+ pip install -e ".[dev,service,pdf]"
35
+ - name: Run the local CI gate
36
+ run: bash scripts/ci.sh
37
+ env:
38
+ PYTHON: python
@@ -0,0 +1,57 @@
1
+ # Release → PyPI via Trusted Publishing (OIDC — NO stored token / secret).
2
+ #
3
+ # Trigger: publishing a GitHub Release. The `gate` job runs the exact same checks as
4
+ # `scripts/ci.sh` across the support matrix; only if every leg is green does `publish`
5
+ # build + upload the `rag-spine` distribution.
6
+ #
7
+ # ── One-time PyPI setup (web UI; needs account login) ──────────────────────────────────
8
+ # The `rag-spine` project already exists, so add a trusted publisher on it directly:
9
+ # Manage → rag-spine → Publishing → "Add a new publisher":
10
+ # Owner: VoldemortGin Repository: ragspine Workflow: release.yml Environment: (blank)
11
+ #
12
+ # ── To cut a release ───────────────────────────────────────────────────────────────────
13
+ # Bump `version` in pyproject.toml, commit, then publish a GitHub Release tagged e.g.
14
+ # v0.1.0. The published version is taken from pyproject, not the tag — keep them in sync.
15
+ name: Release
16
+
17
+ on:
18
+ release:
19
+ types: [published]
20
+
21
+ jobs:
22
+ gate:
23
+ strategy:
24
+ fail-fast: true
25
+ matrix:
26
+ python: ["3.10", "3.11", "3.12", "3.13"]
27
+ runs-on: ubuntu-latest
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+ - uses: actions/setup-python@v5
31
+ with:
32
+ python-version: ${{ matrix.python }}
33
+ - name: Install (dev + service + pdf)
34
+ run: |
35
+ python -m pip install -U pip
36
+ pip install -e ".[dev,service,pdf]"
37
+ - name: Gate (scripts/ci.sh)
38
+ run: bash scripts/ci.sh
39
+ env:
40
+ PYTHON: python
41
+
42
+ publish:
43
+ needs: gate
44
+ runs-on: ubuntu-latest
45
+ permissions:
46
+ id-token: write # OIDC — required for Trusted Publishing
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+ - uses: actions/setup-python@v5
50
+ with:
51
+ python-version: "3.13"
52
+ - name: Build sdist + wheel
53
+ run: |
54
+ python -m pip install -U build
55
+ python -m build
56
+ - name: Publish to PyPI (Trusted Publishing / OIDC)
57
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,40 @@
1
+ # Virtualenvs & caches
2
+ .venv/
3
+ venv/
4
+ __pycache__/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ .mypy_cache/
8
+ *.pyc
9
+ *.egg-info/
10
+ build/
11
+ dist/
12
+
13
+ # data/ holds regenerable artifacts (synthetic fixtures, sqlite DBs) — ignore its
14
+ # contents, but KEEP the directory present and force-track the version-controlled
15
+ # golden evaluation sets. NOTE: this must be `data/*` (ignore contents), not `data/`
16
+ # (ignore the dir) — git won't descend into a wholesale-excluded directory, so a bare
17
+ # `!data/golden/` would have no effect.
18
+ data/*
19
+ !data/golden/
20
+ !data/.gitkeep
21
+
22
+ # Claude Code: share project settings, ignore local/personal overrides
23
+ .claude/settings.local.json
24
+ .claude/*.local.json
25
+
26
+ # Generated docs (API ref, symbol/dependency indexes) — script-produced, regenerable.
27
+ # Ignore contents but keep the directory present, mirroring the data/ rule above.
28
+ docs/generated/*
29
+ !docs/generated/.gitkeep
30
+
31
+ # Built static doc site (make docs → pdoc API reference); regenerable, nginx-served.
32
+ docs/site/
33
+
34
+ # OS / editor
35
+ .DS_Store
36
+ .idea/
37
+ .vscode/
38
+
39
+ # Local tooling artifacts (eval scratch output)
40
+ .jbeval/
File without changes
@@ -0,0 +1,94 @@
1
+ # CLAUDE.md — RAGSpine
2
+
3
+ Working contract for AI coding sessions (and humans) on this repo. Read `README.md` for the
4
+ full pitch; this file is the operational guide.
5
+
6
+ ## What this is
7
+
8
+ RAGSpine — a **framework-free backend RAG engine** (no Dify/LangGraph). Deterministic
9
+ dual-channel (structured numeric + narrative RAG) + agent orchestration, with
10
+ **anti-fabrication** and **source provenance** as code-enforced invariants. Every external
11
+ dependency is a `Protocol`, so the core imports zero SDKs and runs offline.
12
+
13
+ ## Layout (deep, domain-grouped — find the file by folder first)
14
+
15
+ ```
16
+ src/ragspine/{common, extraction, ingestion, storage, retrieval, agent, eval, service}
17
+ ```
18
+
19
+ See the "Architecture" tree in `README.md` for what lives where and the request flow.
20
+
21
+ ## Docs map (find docs by folder, like code)
22
+
23
+ Docs are a retrieval system, not a library — at scale no agent reads them whole.
24
+ Full convention in `docs/README.md`. Short version:
25
+
26
+ - **This file** — the always-on routing table. Keep it small; no content lives here.
27
+ - `src/ragspine/<domain>/CLAUDE.md` — per-domain contract, auto-loaded in that subtree.
28
+ - `src/ragspine/<domain>/docs/*.md` — deep dives, pulled by grep / explicit read.
29
+ - `docs/` — cross-cutting: `architecture.md`, `invariants.md`, `glossary.md`, `adr/`.
30
+ - `docs/generated/` — script-produced (API ref, indexes); git-ignored, never hand-edited.
31
+
32
+ Docs describing code carry `covers:` + `verified-against:` frontmatter;
33
+ `scripts/check_doc_drift.py` flags any whose code changed since last verified.
34
+
35
+ ## Run (always from the project root)
36
+
37
+ - **Make:** common commands are wrapped in the repo-root `Makefile` — `make help` lists them
38
+ (`make install` · `make ci` · `make test` · `make demo` · `make lint` · `make fmt` ·
39
+ `make drift` · `make hooks` · `make serve`/`make worker` · `make ask Q="…"`). Override the
40
+ interpreter with `make <t> PYTHON=python3.12`. The raw commands below remain the source of truth.
41
+ - **Setup:** `uv venv .venv` then `VIRTUAL_ENV="$(pwd)/.venv" uv pip install -e ".[dev,service]"`
42
+ (the `VIRTUAL_ENV=` prefix is required so `uv` targets this venv, not a system Python).
43
+ Extras: `[pdf]` `[ocr]` `[llm]` `[embed]`.
44
+ - **Tests:** `.venv/bin/python -m pytest tests/ -q` → expect **1078 passed, 1 gpu-skipped**.
45
+ - **CI (local):** `scripts/ci.sh` is the gate (tests + demo smoke); enable the pre-push hook
46
+ once with `git config core.hooksPath .githooks`. GitHub Actions is dormant (manual-only) to
47
+ avoid consuming quota — see README "Continuous integration (local)".
48
+ - **Demo:** `.venv/bin/python scripts/run_demo.py` → expect `ALL CHECKS PASSED`.
49
+ - **Ask:** `.venv/bin/python scripts/ask.py --provider mock --db data/fact_metric.db "中国内地FY2024的REVENUE是多少"`.
50
+ - **Service:** `scripts/run_server.py` (FastAPI) + `scripts/run_worker.py` (RQ, needs Redis).
51
+
52
+ ## Conventions
53
+
54
+ - **TDD** — tests are the spec: write the failing test first, then implement to green.
55
+ Never weaken a test to make it pass; if a frozen/regression test breaks, you broke behavior.
56
+ - Python **3.10+** type hints. Import order: **stdlib > third-party > local**. Stay simple;
57
+ match the surrounding style.
58
+ - **Deep, domain-grouped layout** — organize by domain/feature, never by technical layer
59
+ (a *Screaming Architecture* / *package-by-feature* stance): the folder path should locate a
60
+ file before you read its name. Split a package as soon as it holds a second responsibility;
61
+ prefer pushing code into a deeper domain subtree over letting a flat module become a
62
+ catch-all. Depth tracks responsibility — go as deep as each level earns a distinct concern.
63
+ - **Cross-platform** (`pathlib`, no OS-specific calls). Run scripts/tests from the repo root.
64
+ - **Minimal diff** — change only what the task needs; don't refactor unrelated code or delete
65
+ pre-existing dead code.
66
+
67
+ ## Invariants (do not break)
68
+
69
+ - **Anti-fabrication** — when the structured channel returns no `found` fact, the orchestrator
70
+ rewrites the answer to "not found" regardless of model output (`agent/agent.py`).
71
+ - **Provenance** — every fact/answer carries `source_doc_id` + locator. Don't drop lineage.
72
+ - **RESTRICTED isolation** — sensitivity-`RESTRICTED` content is filtered at **two** exits
73
+ (`retrieval/link`, `retrieval/rerank`) before it can reach a prompt.
74
+ - **Privacy-aware traces** — `common/observability` records codes/counts/timings only, never
75
+ answer / fact value / chunk text.
76
+ - **Config-driven, no hardcoded company** — identity / metrics / competitors come from
77
+ `CompanyProfile` (`config/company.example.toml`). Don't hardcode a company anywhere.
78
+ - **Pluggability** — LLM / embeddings / reranker / OCR / queue are `Protocol`s, lazy-imported.
79
+ The core runs offline with `MockProvider`.
80
+
81
+ ## Demo data
82
+
83
+ The bundled demo uses a **fictional** company (ACME), synthetic figures, and a fictional
84
+ competitor set, generated by `scripts/make_*.py` (deterministic, regenerable). The
85
+ version-controlled golden evaluation sets live under `data/golden/` (force-tracked via the
86
+ `.gitignore` `!data/golden/` rule). **Never add real-world data.**
87
+
88
+ ## Service layer
89
+
90
+ `service/`: `ServiceConfig` (env `RAGSPINE_*`), FastAPI app (app factory + dependency
91
+ injection), RQ task queue (`FakeQueue` for tests, `RQQueue` for prod), ingestion jobs
92
+ (worker-owned stores), and the FAQ short-circuit cache. The FAQ layer must keep its
93
+ conservative exclusions (structured-numeric / competitor / real-time / expired / disabled /
94
+ RESTRICTED never short-circuit) — it sits in front of the anti-fabrication guard.
@@ -0,0 +1,201 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or Derivative
95
+ Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and do
117
+ not modify the License. You may add Your own attribution notices
118
+ within Derivative Works that You distribute, alongside or as an
119
+ addendum to the NOTICE text from the Work, provided that such
120
+ additional attribution notices cannot be construed as modifying
121
+ the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2026 The RAGSpine Authors
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
@@ -0,0 +1,127 @@
1
+ # RAGSpine — common dev / CI-CD commands.
2
+ #
3
+ # `make` or `make help` lists every target. Targets honour the project venv by default;
4
+ # override the interpreter with: make test PYTHON=python3.12
5
+ #
6
+ # Always run from the repo root (the scripts anchor on .project-root).
7
+
8
+ .DEFAULT_GOAL := help
9
+ PYTHON ?= .venv/bin/python
10
+ VENV ?= .venv
11
+
12
+ # ---- setup ---------------------------------------------------------------------------
13
+
14
+ .PHONY: venv
15
+ venv: ## Create the project virtualenv (.venv) with uv
16
+ uv venv $(VENV)
17
+
18
+ .PHONY: install
19
+ install: ## Editable install with dev+service extras (the usual dev setup)
20
+ VIRTUAL_ENV="$(CURDIR)/$(VENV)" uv pip install -e ".[dev,service]"
21
+
22
+ .PHONY: install-all
23
+ install-all: ## Editable install with all non-GPU extras (dev,service,llm,embed)
24
+ VIRTUAL_ENV="$(CURDIR)/$(VENV)" uv pip install -e ".[dev,service,llm,embed]"
25
+
26
+ .PHONY: hooks
27
+ hooks: ## Enable the pre-push CI gate (one-time, per clone)
28
+ git config core.hooksPath .githooks
29
+ @echo "pre-push CI gate enabled (emergency bypass: git push --no-verify)"
30
+
31
+ # ---- quality gate --------------------------------------------------------------------
32
+
33
+ .PHONY: ci
34
+ ci: ## Local CI gate: tests + demo smoke (exactly what pre-push runs)
35
+ PYTHON=$(PYTHON) scripts/ci.sh
36
+
37
+ .PHONY: test
38
+ test: ## Run the test suite (excludes gpu-marked tests)
39
+ $(PYTHON) -m pytest tests/ -q -m "not gpu"
40
+
41
+ .PHONY: test-all
42
+ test-all: ## Run the full suite including gpu-marked tests
43
+ $(PYTHON) -m pytest tests/ -q
44
+
45
+ .PHONY: lint
46
+ lint: ## ruff + mypy (informational, non-blocking)
47
+ PYTHON=$(PYTHON) scripts/lint.sh
48
+
49
+ .PHONY: fmt
50
+ fmt: ## Auto-fix lint and format with ruff
51
+ $(PYTHON) -m ruff check --fix src/ragspine scripts tests
52
+ $(PYTHON) -m ruff format src/ragspine scripts tests
53
+
54
+ .PHONY: drift
55
+ drift: ## Flag docs whose covered code changed since last verified
56
+ $(PYTHON) scripts/check_doc_drift.py
57
+
58
+ # ---- demo / eval ---------------------------------------------------------------------
59
+
60
+ .PHONY: demo
61
+ demo: ## Run the offline end-to-end demo (expects ALL CHECKS PASSED)
62
+ $(PYTHON) scripts/run_demo.py
63
+
64
+ .PHONY: ask
65
+ ask: ## Ask offline, e.g. make ask Q="中国内地FY2024的REVENUE是多少"
66
+ $(PYTHON) scripts/ask.py --provider mock --db data/fact_metric.db "$(Q)"
67
+
68
+ .PHONY: eval-qa
69
+ eval-qa: ## QA evaluation against the golden set (baseline-gated)
70
+ $(PYTHON) scripts/run_qa_eval.py
71
+
72
+ .PHONY: eval-retrieval
73
+ eval-retrieval: ## BM25-vs-hybrid retrieval A/B harness
74
+ $(PYTHON) scripts/eval_retrieval_ab.py
75
+
76
+ # ---- service -------------------------------------------------------------------------
77
+
78
+ .PHONY: serve
79
+ serve: ## Run the FastAPI server
80
+ $(PYTHON) scripts/run_server.py
81
+
82
+ .PHONY: worker
83
+ worker: ## Run the RQ worker (needs Redis)
84
+ $(PYTHON) scripts/run_worker.py
85
+
86
+ # ---- fixtures / docs -----------------------------------------------------------------
87
+
88
+ .PHONY: fixtures
89
+ fixtures: ## Regenerate the synthetic demo fixtures (deterministic)
90
+ $(PYTHON) scripts/make_synthetic_deck.py
91
+ $(PYTHON) scripts/make_fixtures_excel.py
92
+ $(PYTHON) scripts/make_fixtures_pptx.py
93
+ $(PYTHON) scripts/make_fixtures_pdf.py
94
+
95
+ .PHONY: docs
96
+ docs: ## Build the static API-reference site from docstrings (pdoc → docs/site, nginx-ready)
97
+ $(PYTHON) -m pdoc ragspine -o docs/site
98
+ @echo "API docs → docs/site/ (deploy: point nginx 'root' at $(abspath docs/site))"
99
+
100
+ .PHONY: clean
101
+ clean: ## Remove caches (pyc, pytest/ruff/mypy) and build artifacts
102
+ find . -type d -name __pycache__ -prune -exec rm -rf {} +
103
+ rm -rf .pytest_cache .ruff_cache .mypy_cache dist build *.egg-info
104
+
105
+ # ---- release -------------------------------------------------------------------------
106
+
107
+ .PHONY: build
108
+ build: ## Build wheel + sdist into dist/ and validate with twine check
109
+ rm -rf dist
110
+ $(PYTHON) -m build
111
+ $(PYTHON) -m twine check dist/*
112
+
113
+ .PHONY: publish-test
114
+ publish-test: build ## Build, then upload to TestPyPI (rehearse a real release; needs a TestPyPI token)
115
+ $(PYTHON) -m twine upload --repository testpypi dist/*
116
+
117
+ .PHONY: publish
118
+ publish: build ## Build, then upload to PyPI (real release; needs a PyPI token)
119
+ $(PYTHON) -m twine upload dist/*
120
+
121
+ # ---- meta ----------------------------------------------------------------------------
122
+
123
+ .PHONY: help
124
+ help: ## List available targets
125
+ @grep -hE '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) \
126
+ | sort \
127
+ | awk 'BEGIN{FS=":.*?## "}{printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'