financebench-rag-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. financebench_rag_agent-0.1.0/.env.example +202 -0
  2. financebench_rag_agent-0.1.0/.gitignore +164 -0
  3. financebench_rag_agent-0.1.0/Dockerfile +44 -0
  4. financebench_rag_agent-0.1.0/Makefile +92 -0
  5. financebench_rag_agent-0.1.0/PKG-INFO +210 -0
  6. financebench_rag_agent-0.1.0/README.md +115 -0
  7. financebench_rag_agent-0.1.0/alembic.ini +48 -0
  8. financebench_rag_agent-0.1.0/cli/__init__.py +3 -0
  9. financebench_rag_agent-0.1.0/cli/__main__.py +48 -0
  10. financebench_rag_agent-0.1.0/cli/api_client.py +94 -0
  11. financebench_rag_agent-0.1.0/cli/commands/__init__.py +0 -0
  12. financebench_rag_agent-0.1.0/cli/commands/approvals.py +217 -0
  13. financebench_rag_agent-0.1.0/cli/commands/chat.py +321 -0
  14. financebench_rag_agent-0.1.0/cli/commands/down.py +51 -0
  15. financebench_rag_agent-0.1.0/cli/commands/login.py +56 -0
  16. financebench_rag_agent-0.1.0/cli/commands/logout.py +14 -0
  17. financebench_rag_agent-0.1.0/cli/commands/setup.py +268 -0
  18. financebench_rag_agent-0.1.0/cli/commands/status.py +81 -0
  19. financebench_rag_agent-0.1.0/cli/commands/threads.py +214 -0
  20. financebench_rag_agent-0.1.0/cli/commands/upgrade.py +130 -0
  21. financebench_rag_agent-0.1.0/cli/credentials.py +89 -0
  22. financebench_rag_agent-0.1.0/cli/interactive.py +434 -0
  23. financebench_rag_agent-0.1.0/cli/render.py +190 -0
  24. financebench_rag_agent-0.1.0/cli/slash.py +384 -0
  25. financebench_rag_agent-0.1.0/cli/sse_consumer.py +120 -0
  26. financebench_rag_agent-0.1.0/compose.minimal.yml +97 -0
  27. financebench_rag_agent-0.1.0/data/sample/10k_aapl_2023.pdf +290 -0
  28. financebench_rag_agent-0.1.0/data/sample/10k_msft_2023.pdf +500 -0
  29. financebench_rag_agent-0.1.0/data/sample/10k_tsla_2023.pdf +423 -0
  30. financebench_rag_agent-0.1.0/data/sample/expense_policy_corporate_travel.pdf +0 -0
  31. financebench_rag_agent-0.1.0/data/sample/expense_policy_procurement.pdf +0 -0
  32. financebench_rag_agent-0.1.0/data/sample/invoice_cloudservices_003.pdf +0 -0
  33. financebench_rag_agent-0.1.0/data/sample/invoice_globalconsulting_002.pdf +0 -0
  34. financebench_rag_agent-0.1.0/data/sample/invoice_techsolutions_001.pdf +0 -0
  35. financebench_rag_agent-0.1.0/docker-compose.yml +330 -0
  36. financebench_rag_agent-0.1.0/docs/api-reference.md +527 -0
  37. financebench_rag_agent-0.1.0/docs/architecture.md +170 -0
  38. financebench_rag_agent-0.1.0/docs/cli.md +172 -0
  39. financebench_rag_agent-0.1.0/docs/deploy.md +135 -0
  40. financebench_rag_agent-0.1.0/docs/engineering-log.md +1711 -0
  41. financebench_rag_agent-0.1.0/docs/evaluation.md +192 -0
  42. financebench_rag_agent-0.1.0/docs/rbac-matrix.md +72 -0
  43. financebench_rag_agent-0.1.0/docs/research/01-evaluation-and-llms.md +138 -0
  44. financebench_rag_agent-0.1.0/docs/research/02-production-roadmap.md +206 -0
  45. financebench_rag_agent-0.1.0/docs/research/03-ui-framework-assessment.md +169 -0
  46. financebench_rag_agent-0.1.0/docs/research/04-benchmarking-vs-literature.md +144 -0
  47. financebench_rag_agent-0.1.0/docs/research/05-non-llm-retrieval-filters.md +87 -0
  48. financebench_rag_agent-0.1.0/docs/research/06-failure-analysis.md +112 -0
  49. financebench_rag_agent-0.1.0/docs/research/07-chunker-experiments.md +178 -0
  50. financebench_rag_agent-0.1.0/docs/setup.md +160 -0
  51. financebench_rag_agent-0.1.0/docs/upgrade.md +189 -0
  52. financebench_rag_agent-0.1.0/pyproject.toml +180 -0
  53. financebench_rag_agent-0.1.0/scripts/analyze_financebench_run.py +215 -0
  54. financebench_rag_agent-0.1.0/scripts/audit_failed_qs.py +210 -0
  55. financebench_rag_agent-0.1.0/scripts/build_grader_training_data.py +246 -0
  56. financebench_rag_agent-0.1.0/scripts/build_judge_calibration.py +395 -0
  57. financebench_rag_agent-0.1.0/scripts/build_pipeline_diagnostic.py +158 -0
  58. financebench_rag_agent-0.1.0/scripts/build_reranker_training_data.py +284 -0
  59. financebench_rag_agent-0.1.0/scripts/build_reranker_training_data_v2.py +400 -0
  60. financebench_rag_agent-0.1.0/scripts/debug_01865.py +133 -0
  61. financebench_rag_agent-0.1.0/scripts/debug_03029.py +131 -0
  62. financebench_rag_agent-0.1.0/scripts/demo/hitl.tape +99 -0
  63. financebench_rag_agent-0.1.0/scripts/demo/memory.tape +71 -0
  64. financebench_rag_agent-0.1.0/scripts/demo/rbac.tape +75 -0
  65. financebench_rag_agent-0.1.0/scripts/diff_financebench_runs.py +166 -0
  66. financebench_rag_agent-0.1.0/scripts/download_financebench.py +93 -0
  67. financebench_rag_agent-0.1.0/scripts/download_financebench_pdfs.py +117 -0
  68. financebench_rag_agent-0.1.0/scripts/download_sample_data.py +806 -0
  69. financebench_rag_agent-0.1.0/scripts/download_sec_filings.py +160 -0
  70. financebench_rag_agent-0.1.0/scripts/dual_judge_check.py +228 -0
  71. financebench_rag_agent-0.1.0/scripts/enrich_bm25_doc2query.py +234 -0
  72. financebench_rag_agent-0.1.0/scripts/eval_grader_lora.py +206 -0
  73. financebench_rag_agent-0.1.0/scripts/eval_grader_models_compare.py +434 -0
  74. financebench_rag_agent-0.1.0/scripts/eval_reranker_stratified.py +295 -0
  75. financebench_rag_agent-0.1.0/scripts/export_calibration_to_md.py +158 -0
  76. financebench_rag_agent-0.1.0/scripts/export_pipeline_diagnostic_to_md.py +342 -0
  77. financebench_rag_agent-0.1.0/scripts/extract_sec_sections.py +223 -0
  78. financebench_rag_agent-0.1.0/scripts/generate_jwt.py +23 -0
  79. financebench_rag_agent-0.1.0/scripts/ingest_documents.py +34 -0
  80. financebench_rag_agent-0.1.0/scripts/ingest_financebench.py +167 -0
  81. financebench_rag_agent-0.1.0/scripts/ingest_financebench_docling.py +216 -0
  82. financebench_rag_agent-0.1.0/scripts/inspect_gold_chunks.py +146 -0
  83. financebench_rag_agent-0.1.0/scripts/label_gold_chunks.py +412 -0
  84. financebench_rag_agent-0.1.0/scripts/label_judge_calibration.py +191 -0
  85. financebench_rag_agent-0.1.0/scripts/parse_calibration_md.py +105 -0
  86. financebench_rag_agent-0.1.0/scripts/parse_pipeline_diagnostic_md.py +138 -0
  87. financebench_rag_agent-0.1.0/scripts/patch_cache_indices.py +275 -0
  88. financebench_rag_agent-0.1.0/scripts/reembed_collection.py +152 -0
  89. financebench_rag_agent-0.1.0/scripts/restart.sh +178 -0
  90. financebench_rag_agent-0.1.0/scripts/run_chunker_ab_gate.py +45 -0
  91. financebench_rag_agent-0.1.0/scripts/run_clean_mode_benchmark.py +138 -0
  92. financebench_rag_agent-0.1.0/scripts/run_devset.py +313 -0
  93. financebench_rag_agent-0.1.0/scripts/score_deepeval.py +319 -0
  94. financebench_rag_agent-0.1.0/scripts/score_patronus.py +267 -0
  95. financebench_rag_agent-0.1.0/scripts/seed_qdrant.py +32 -0
  96. financebench_rag_agent-0.1.0/scripts/show_run.py +147 -0
  97. financebench_rag_agent-0.1.0/scripts/shutdown.sh +68 -0
  98. financebench_rag_agent-0.1.0/scripts/smoke_abaci.py +135 -0
  99. financebench_rag_agent-0.1.0/scripts/smoke_calculator.py +193 -0
  100. financebench_rag_agent-0.1.0/scripts/smoke_doc2query.py +88 -0
  101. financebench_rag_agent-0.1.0/scripts/smoke_docling_chunker.py +177 -0
  102. financebench_rag_agent-0.1.0/scripts/smoke_docling_markdown.py +163 -0
  103. financebench_rag_agent-0.1.0/scripts/smoke_docling_markdown_chunker.py +162 -0
  104. financebench_rag_agent-0.1.0/scripts/smoke_hallucination_claude.py +132 -0
  105. financebench_rag_agent-0.1.0/scripts/smoke_research_agent.py +208 -0
  106. financebench_rag_agent-0.1.0/scripts/smoke_research_agent_day3.py +233 -0
  107. financebench_rag_agent-0.1.0/scripts/smoke_tier_downgrade.py +244 -0
  108. financebench_rag_agent-0.1.0/scripts/smoke_voyage_finance.py +125 -0
  109. financebench_rag_agent-0.1.0/scripts/train_grader_lora.py +279 -0
  110. financebench_rag_agent-0.1.0/scripts/train_ltr_gate.py +75 -0
  111. financebench_rag_agent-0.1.0/scripts/train_reranker_lora.py +339 -0
  112. financebench_rag_agent-0.1.0/src/__init__.py +0 -0
  113. financebench_rag_agent-0.1.0/src/api/__init__.py +0 -0
  114. financebench_rag_agent-0.1.0/src/api/dependencies.py +22 -0
  115. financebench_rag_agent-0.1.0/src/api/main.py +139 -0
  116. financebench_rag_agent-0.1.0/src/api/routes/__init__.py +0 -0
  117. financebench_rag_agent-0.1.0/src/api/routes/admin.py +469 -0
  118. financebench_rag_agent-0.1.0/src/api/routes/approvals.py +189 -0
  119. financebench_rag_agent-0.1.0/src/api/routes/auth.py +69 -0
  120. financebench_rag_agent-0.1.0/src/api/routes/chat.py +447 -0
  121. financebench_rag_agent-0.1.0/src/api/routes/documents.py +122 -0
  122. financebench_rag_agent-0.1.0/src/api/routes/health.py +56 -0
  123. financebench_rag_agent-0.1.0/src/api/routes/hitl.py +181 -0
  124. financebench_rag_agent-0.1.0/src/api/routes/ingest.py +111 -0
  125. financebench_rag_agent-0.1.0/src/api/routes/threads.py +288 -0
  126. financebench_rag_agent-0.1.0/src/config/__init__.py +0 -0
  127. financebench_rag_agent-0.1.0/src/config/prompts.py +696 -0
  128. financebench_rag_agent-0.1.0/src/config/rbac_config.py +87 -0
  129. financebench_rag_agent-0.1.0/src/config/settings.py +212 -0
  130. financebench_rag_agent-0.1.0/src/frontend/__init__.py +0 -0
  131. financebench_rag_agent-0.1.0/src/frontend/gradio_app.py +193 -0
  132. financebench_rag_agent-0.1.0/src/graph/__init__.py +0 -0
  133. financebench_rag_agent-0.1.0/src/graph/builder.py +119 -0
  134. financebench_rag_agent-0.1.0/src/graph/edges.py +90 -0
  135. financebench_rag_agent-0.1.0/src/graph/nodes/__init__.py +0 -0
  136. financebench_rag_agent-0.1.0/src/graph/nodes/entity_extractor.py +164 -0
  137. financebench_rag_agent-0.1.0/src/graph/nodes/generator.py +191 -0
  138. financebench_rag_agent-0.1.0/src/graph/nodes/grader.py +256 -0
  139. financebench_rag_agent-0.1.0/src/graph/nodes/guardrails.py +113 -0
  140. financebench_rag_agent-0.1.0/src/graph/nodes/hallucination.py +112 -0
  141. financebench_rag_agent-0.1.0/src/graph/nodes/hitl_gate.py +127 -0
  142. financebench_rag_agent-0.1.0/src/graph/nodes/query_rewriter.py +48 -0
  143. financebench_rag_agent-0.1.0/src/graph/nodes/rbac_gate.py +18 -0
  144. financebench_rag_agent-0.1.0/src/graph/nodes/reranker.py +37 -0
  145. financebench_rag_agent-0.1.0/src/graph/nodes/research_agent.py +532 -0
  146. financebench_rag_agent-0.1.0/src/graph/nodes/response_formatter.py +47 -0
  147. financebench_rag_agent-0.1.0/src/graph/nodes/retrieval.py +199 -0
  148. financebench_rag_agent-0.1.0/src/graph/nodes/retrieval_evaluator.py +54 -0
  149. financebench_rag_agent-0.1.0/src/graph/nodes/router.py +39 -0
  150. financebench_rag_agent-0.1.0/src/graph/nodes/terminal_nodes.py +91 -0
  151. financebench_rag_agent-0.1.0/src/ingestion/__init__.py +0 -0
  152. financebench_rag_agent-0.1.0/src/ingestion/chunker.py +522 -0
  153. financebench_rag_agent-0.1.0/src/ingestion/docling_loader.py +94 -0
  154. financebench_rag_agent-0.1.0/src/ingestion/metadata_extractor.py +107 -0
  155. financebench_rag_agent-0.1.0/src/ingestion/pipeline.py +65 -0
  156. financebench_rag_agent-0.1.0/src/ingestion/qdrant_uploader.py +49 -0
  157. financebench_rag_agent-0.1.0/src/models/__init__.py +0 -0
  158. financebench_rag_agent-0.1.0/src/models/auth.py +10 -0
  159. financebench_rag_agent-0.1.0/src/models/schemas.py +164 -0
  160. financebench_rag_agent-0.1.0/src/models/state.py +105 -0
  161. financebench_rag_agent-0.1.0/src/services/__init__.py +0 -0
  162. financebench_rag_agent-0.1.0/src/services/auth_service.py +37 -0
  163. financebench_rag_agent-0.1.0/src/services/candidate_validator.py +66 -0
  164. financebench_rag_agent-0.1.0/src/services/company_registry.py +122 -0
  165. financebench_rag_agent-0.1.0/src/services/cost_tracker.py +501 -0
  166. financebench_rag_agent-0.1.0/src/services/embeddings.py +171 -0
  167. financebench_rag_agent-0.1.0/src/services/event_log.py +368 -0
  168. financebench_rag_agent-0.1.0/src/services/guardrails_service.py +176 -0
  169. financebench_rag_agent-0.1.0/src/services/llm_factory.py +412 -0
  170. financebench_rag_agent-0.1.0/src/services/llm_retry.py +138 -0
  171. financebench_rag_agent-0.1.0/src/services/ltr_gate_service.py +87 -0
  172. financebench_rag_agent-0.1.0/src/services/multi_hyde.py +112 -0
  173. financebench_rag_agent-0.1.0/src/services/request_context.py +15 -0
  174. financebench_rag_agent-0.1.0/src/services/reranker_service.py +156 -0
  175. financebench_rag_agent-0.1.0/src/services/result_cache.py +197 -0
  176. financebench_rag_agent-0.1.0/src/services/roles_service.py +240 -0
  177. financebench_rag_agent-0.1.0/src/services/thread_service.py +292 -0
  178. financebench_rag_agent-0.1.0/src/services/vector_store.py +298 -0
  179. financebench_rag_agent-0.1.0/src/tools/__init__.py +0 -0
  180. financebench_rag_agent-0.1.0/src/tools/calculator.py +182 -0
  181. financebench_rag_agent-0.1.0/tests/__init__.py +0 -0
  182. financebench_rag_agent-0.1.0/tests/conftest.py +76 -0
  183. financebench_rag_agent-0.1.0/tests/evaluation/__init__.py +0 -0
  184. financebench_rag_agent-0.1.0/tests/evaluation/analysis_utils.py +154 -0
  185. financebench_rag_agent-0.1.0/tests/evaluation/check_dual_thresholds.py +56 -0
  186. financebench_rag_agent-0.1.0/tests/evaluation/diagnostic_runner.py +455 -0
  187. financebench_rag_agent-0.1.0/tests/evaluation/eval_config.py +102 -0
  188. financebench_rag_agent-0.1.0/tests/evaluation/eval_dataset.json +307 -0
  189. financebench_rag_agent-0.1.0/tests/evaluation/financebench_eval_config.py +14 -0
  190. financebench_rag_agent-0.1.0/tests/evaluation/freeze_baselines.py +57 -0
  191. financebench_rag_agent-0.1.0/tests/evaluation/judge_calibration_manual_labels_summary.csv +105 -0
  192. financebench_rag_agent-0.1.0/tests/evaluation/judge_calibration_v1.jsonl +89 -0
  193. financebench_rag_agent-0.1.0/tests/evaluation/judge_calibration_v1_holdout.jsonl +15 -0
  194. financebench_rag_agent-0.1.0/tests/evaluation/judge_calibration_v1_holdout_labeled.md +930 -0
  195. financebench_rag_agent-0.1.0/tests/evaluation/judge_calibration_v1_labeled.md +5338 -0
  196. financebench_rag_agent-0.1.0/tests/evaluation/judge_eval.py +497 -0
  197. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval.py +839 -0
  198. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_data/v1/_audit.jsonl +1093 -0
  199. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_data/v1/gold_chunks.jsonl +150 -0
  200. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_results/financebench_grader_ab_v1.json +3744 -0
  201. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_results/financebench_phase_eval_v1.json +1034 -0
  202. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_results/financebench_phase_eval_v1_grader.json +979 -0
  203. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_results/financebench_phase_eval_v1_per_question.jsonl +147 -0
  204. financebench_rag_agent-0.1.0/tests/evaluation/phase_eval_results/financebench_phase_eval_v2_post_sprint_7_15.json +1009 -0
  205. financebench_rag_agent-0.1.0/tests/evaluation/pipeline_diagnostic_manual_labels_summary.json +696 -0
  206. financebench_rag_agent-0.1.0/tests/evaluation/pipeline_diagnostic_v1.jsonl +75 -0
  207. financebench_rag_agent-0.1.0/tests/evaluation/pipeline_diagnostic_v1_labeled.md +9239 -0
  208. financebench_rag_agent-0.1.0/tests/evaluation/rejudge.py +182 -0
  209. financebench_rag_agent-0.1.0/tests/evaluation/run_evaluation.py +352 -0
  210. financebench_rag_agent-0.1.0/tests/evaluation/run_financebench.py +1171 -0
  211. financebench_rag_agent-0.1.0/tests/evaluation/run_financebench_phase_a.py +179 -0
  212. financebench_rag_agent-0.1.0/tests/integration/__init__.py +0 -0
  213. financebench_rag_agent-0.1.0/tests/integration/test_rbac_integration.py +202 -0
  214. financebench_rag_agent-0.1.0/tests/integration/test_sprint9_endpoints_integration.py +299 -0
  215. financebench_rag_agent-0.1.0/tests/unit/__init__.py +0 -0
  216. financebench_rag_agent-0.1.0/tests/unit/test_admin_audit_and_evaluations.py +248 -0
  217. financebench_rag_agent-0.1.0/tests/unit/test_admin_costs.py +187 -0
  218. financebench_rag_agent-0.1.0/tests/unit/test_admin_users_and_roles.py +168 -0
  219. financebench_rag_agent-0.1.0/tests/unit/test_auth_routes.py +72 -0
  220. financebench_rag_agent-0.1.0/tests/unit/test_auth_service.py +103 -0
  221. financebench_rag_agent-0.1.0/tests/unit/test_calculator.py +282 -0
  222. financebench_rag_agent-0.1.0/tests/unit/test_candidate_validator.py +20 -0
  223. financebench_rag_agent-0.1.0/tests/unit/test_chunker.py +198 -0
  224. financebench_rag_agent-0.1.0/tests/unit/test_company_registry.py +8 -0
  225. financebench_rag_agent-0.1.0/tests/unit/test_cost_tracker.py +171 -0
  226. financebench_rag_agent-0.1.0/tests/unit/test_documents_route.py +95 -0
  227. financebench_rag_agent-0.1.0/tests/unit/test_edges.py +275 -0
  228. financebench_rag_agent-0.1.0/tests/unit/test_entity_extractor.py +149 -0
  229. financebench_rag_agent-0.1.0/tests/unit/test_guardrails.py +192 -0
  230. financebench_rag_agent-0.1.0/tests/unit/test_ingest_upload.py +103 -0
  231. financebench_rag_agent-0.1.0/tests/unit/test_metadata_extractor.py +167 -0
  232. financebench_rag_agent-0.1.0/tests/unit/test_multi_hyde.py +240 -0
  233. financebench_rag_agent-0.1.0/tests/unit/test_rbac_gate.py +95 -0
  234. financebench_rag_agent-0.1.0/tests/unit/test_response_formatter.py +179 -0
  235. financebench_rag_agent-0.1.0/tests/unit/test_result_cache.py +233 -0
  236. financebench_rag_agent-0.1.0/tests/unit/test_terminal_nodes.py +126 -0
  237. financebench_rag_agent-0.1.0/tests/unit/test_threads_routes.py +144 -0
@@ -0,0 +1,202 @@
1
+ # LLM Providers
2
+ OPENAI_API_KEY=sk-your-openai-key-here
3
+ GROQ_API_KEY=gsk_your-groq-key-here
4
+ ANTHROPIC_API_KEY=sk-ant-your-anthropic-key-here
5
+ # Voyage AI — finance-tuned embeddings (Sprint 7.8 Week 1). Free tier is 50M
6
+ # tokens / account. Sign up at https://www.voyageai.com → API keys.
7
+ VOYAGE_API_KEY=pa-your-voyage-key-here
8
+
9
+ # --- Embedding provider ---
10
+ # "openai" (default) or "voyage". When "voyage", set EMBEDDING_MODEL=voyage-finance-2
11
+ # and EMBEDDING_DIMENSIONS=1024.
12
+ EMBEDDING_PROVIDER=openai
13
+ EMBEDDING_MODEL=text-embedding-3-large
14
+ EMBEDDING_DIMENSIONS=3072
15
+
16
+ # Patronus AI (optional) — hosted fuzzy-match judge for FinanceBench eval.
17
+ # Sign up free at https://app.patronus.ai and paste your key here. Free tier
18
+ # is sufficient for the 150-question FinanceBench run.
19
+ PATRONUS_API_KEY=
20
+ # Set to true to route all LLM calls through OpenAI (bypass Groq free-tier rate
21
+ # limits during eval runs; also bypasses Anthropic for eval cost control).
22
+ # Keep false in production.
23
+ FORCE_OPENAI_ONLY=false
24
+ # Use Groq for the high-volume fast-path nodes (router, grader, query_rewriter).
25
+ # Set false during long evals (e.g. FinanceBench) so router/grader/query_rewriter
26
+ # go to OpenAI even when GROQ_API_KEY is set — avoids the Groq 100k tokens-per-day
27
+ # free-tier cap. Default true preserves the production latency profile.
28
+ USE_GROQ_FAST_PATH=true
29
+ ENABLE_DETERMINISTIC_VALIDATOR=true
30
+ VALIDATOR_MIN_KEEP=3
31
+ ENABLE_LTR_GATE=false
32
+ LTR_GATE_MODEL_PATH=data/models/ltr_gate.json
33
+ LTR_GATE_HIGH_CONFIDENCE=0.9
34
+ LTR_GATE_LOW_CONFIDENCE=0.1
35
+ ENABLE_SELECTIVE_RETRIEVAL_EVALUATOR=false
36
+ RETRIEVAL_EVALUATOR_MIN_CONFIDENCE=0.55
37
+
38
+ # Multi-HyDE (Sprint 7.10a) — N hypothetical 10-K-style passages generated per
39
+ # query, each embedded + searched, RRF-fused across (orig + N) paths. Targets
40
+ # the question-phrasing vs document-phrasing vocabulary gap.
41
+ ENABLE_MULTI_HYDE=false
42
+ MULTI_HYDE_N=3
43
+ MULTI_HYDE_MODEL=gpt-4o-mini
44
+ MULTI_HYDE_TEMPERATURE=0.3
45
+
46
+ # LangSmith Observability
47
+ LANGCHAIN_TRACING_V2=true
48
+ LANGCHAIN_PROJECT=rag-agent-dev
49
+ LANGCHAIN_API_KEY=lsv2_pt_your-langsmith-key-here
50
+
51
+ # Authentication
52
+ JWT_SECRET=change-this-to-a-random-secret-in-production
53
+
54
+ # Qdrant
55
+ QDRANT_HOST=localhost
56
+ QDRANT_PORT=6333
57
+ QDRANT_COLLECTION=financial_docs
58
+
59
+ # Documents root — filesystem location from which `GET /documents/{filename}`
60
+ # serves PDFs for the frontend citation clickthrough. Path traversal is
61
+ # rejected (only basenames inside this directory are served).
62
+ DOCUMENTS_ROOT=data/sample
63
+
64
+ # PostgreSQL (for LangGraph checkpointer)
65
+ POSTGRES_DB=rag_agent
66
+ POSTGRES_USER=rag_user
67
+ POSTGRES_PASSWORD=change-this-in-production
68
+ POSTGRES_HOST=localhost
69
+ POSTGRES_PORT=5432
70
+
71
+ # LiteLLM gateway (Sprint 8 8a) — single proxy fronting every LLM call.
72
+ # Default empty = direct-provider behavior (Sprint 7.x compat).
73
+ # Set after `docker compose up -d litellm` to route through the proxy:
74
+ # - inside compose: http://litellm:4000
75
+ # - host-based dev: http://localhost:4000
76
+ LITELLM_URL=
77
+
78
+ # Redis (Sprint 8 8b) — backs the LiteLLM `redis-semantic` cache.
79
+ # Read from the litellm container's environment. docker-compose.yml sets
80
+ # REDIS_HOST=redis / REDIS_PORT=6379 / REDIS_PASSWORD= explicitly, so
81
+ # nothing here is required for local-compose dev. The keys are listed
82
+ # for production deployments where Redis lives on a separate host.
83
+ REDIS_HOST=
84
+ REDIS_PORT=
85
+ REDIS_PASSWORD=
86
+
87
+ # Langfuse (Sprint 8 8c) — self-hosted observability stack. The local
88
+ # docker-compose seeds an org/project with dev defaults so LiteLLM
89
+ # auto-attaches on boot with no manual UI step. For shared / multi-user /
90
+ # production deployments override every variable in this section: the
91
+ # defaults baked into docker-compose.yml only protect a local-machine
92
+ # instance.
93
+ #
94
+ # LANGFUSE_HOST / PUBLIC_KEY / SECRET_KEY — point the rag-agent at any
95
+ # Langfuse instance (hosted or self-hosted). Empty = use the in-compose
96
+ # `langfuse-web` defaults from docker-compose.yml.
97
+ LANGFUSE_HOST=
98
+ LANGFUSE_PUBLIC_KEY=
99
+ LANGFUSE_SECRET_KEY=
100
+
101
+ # Langfuse data-plane secrets — only consumed by docker-compose.yml to
102
+ # parameterize the self-hosted stack (langfuse-postgres / langfuse-redis /
103
+ # langfuse-clickhouse / langfuse-minio / langfuse-worker / langfuse-web).
104
+ # Each has a dev-only default in docker-compose.yml so a fresh `docker
105
+ # compose up` works without setting any of these. Override BEFORE any
106
+ # shared deployment.
107
+ #
108
+ # Cryptography — regenerate each with `openssl rand -hex 32`:
109
+ LANGFUSE_SALT=
110
+ LANGFUSE_ENCRYPTION_KEY=
111
+ LANGFUSE_NEXTAUTH_SECRET=
112
+
113
+ # Data-plane passwords (langfuse-postgres / langfuse-redis / langfuse-clickhouse):
114
+ LANGFUSE_PG_PASSWORD=
115
+ LANGFUSE_REDIS_PASSWORD=
116
+ LANGFUSE_CLICKHOUSE_PASSWORD=
117
+
118
+ # MinIO (S3-compatible object storage for Langfuse event blobs):
119
+ LANGFUSE_MINIO_USER=
120
+ LANGFUSE_MINIO_PASSWORD=
121
+
122
+ # Initial UI admin password for the auto-bootstrapped Langfuse user
123
+ # `dev@local.test`. Used only on first boot of a fresh langfuse-postgres
124
+ # volume; subsequent boots ignore it. Change BEFORE first boot if exposing
125
+ # the Langfuse UI on a shared network.
126
+ LANGFUSE_INIT_USER_PASSWORD=
127
+
128
+ # Sprint 8e — per-stage Redis result cache. Three caches share one Redis DB
129
+ # (logical DB 1, separate from LiteLLM's semantic cache on DB 0):
130
+ # - Voyage query embedding by query text
131
+ # - BGE reranker score by (query, chunk_text)
132
+ # - Grader relevance verdict by (query, chunk_text)
133
+ # All three fail-open (any Redis error logs a warning and falls through to
134
+ # compute), so leaving these unset is safe.
135
+ #
136
+ # IMPORTANT — host-side port is 6380, NOT 6379. Many macOS dev boxes have a
137
+ # brew/launchd Redis already on 6379; the loopback bind wins the race vs
138
+ # docker's wildcard so host-side cache calls would silently land on the
139
+ # wrong Redis. Sprint 8e diagnostic uncovered this. Inside the compose
140
+ # network the container port is still 6379.
141
+ RAG_RESULT_CACHE_ENABLED=1 # Set to 0 to disable all three layers
142
+ RESULT_CACHE_REDIS_HOST=localhost
143
+ RESULT_CACHE_REDIS_PORT=6380 # docker-compose host-mapped port
144
+ RESULT_CACHE_REDIS_DB=1
145
+ RAG_RESULT_CACHE_TTL_SECONDS=604800 # 7 days
146
+
147
+ # Application
148
+ ENVIRONMENT=dev
149
+ LOG_LEVEL=INFO
150
+
151
+ # CORS (JSON array of allowed origins; use ["*"] only in dev).
152
+ # Sprint 9 frontend defaults: Next.js dev on :3000, Gradio on :7860
153
+ # (keep until Gradio is removed in 9.5).
154
+ CORS_ORIGINS=["http://localhost:3000","http://localhost:7860"]
155
+
156
+ # --- Local model placement (Apple Silicon stability tuning) ---
157
+ # These settings control where transformer models load when running on Apple
158
+ # Silicon. Defaults are tuned for *long-running* eval workloads where MPS pool
159
+ # pressure causes OOM after ~50 inferences. Production single-query traffic
160
+ # can override to MPS for ~3x speedup on the BGE reranker if desired.
161
+ #
162
+ # RERANKER_DEVICE=cpu|mps|cuda (default: cpu)
163
+ # BGE cross-encoder reranker placement. CPU is ~100-200ms slower per 8-chunk
164
+ # batch but eliminates Apple-Silicon unified-memory OOM. Override to mps for
165
+ # production speed.
166
+ RERANKER_DEVICE=cpu
167
+ #
168
+ # RERANKER_ADAPTER_PATH=data/models/reranker_ft_v1 (default: unset → stock BGE)
169
+ # Load the Sprint 7.9 LoRA fine-tuned adapter on top of BAAI/bge-reranker-v2-m3.
170
+ # When unset, src/services/reranker_service.py:92 silently falls back to the
171
+ # stock CrossEncoder — same code path, but without the multi-hop slice gains
172
+ # measured in Sprint 7.9 Day 7 (4/13 → 11/13). The Sprint 7.19 audit caught
173
+ # this exact gap: the env var was added at Sprint 7.9 but never propagated
174
+ # into .env or .env.example, so the FT v1 adapter was silently inactive for
175
+ # ~5 sprints. Always set this for production / eval runs.
176
+ RERANKER_ADAPTER_PATH=data/models/reranker_ft_v1
177
+ #
178
+ # RESULT_CACHE_REDIS_HOST=localhost (default: localhost)
179
+ # RESULT_CACHE_REDIS_PORT=6380 (default: 6379 — WRONG for host-side scripts)
180
+ # docker-compose maps the rag-cache redis container's internal port 6379 to
181
+ # host port 6380 (line 'ports: ["6380:6379"]'). Host-side scripts (eval
182
+ # harness, smoke tests) must use port 6380; without it, every cache call
183
+ # raises ConnectionRefused, retrieval starts hitting the no-filter FALLBACK
184
+ # path, and the grader's entity_match rejects all chunks. The full pipeline
185
+ # stays "working" but regresses headline pass-rate to near zero. Sprint 7.18a
186
+ # spent 2 hours diagnosing this. From inside docker-compose (the FastAPI
187
+ # container on the rag-cache network), use the service hostname `redis:6379`
188
+ # instead.
189
+ RESULT_CACHE_REDIS_HOST=localhost
190
+ RESULT_CACHE_REDIS_PORT=6380
191
+ #
192
+ # LLM_GUARD_USE_ONNX=1|0 (default: 1)
193
+ # LLM Guard PromptInjection backend. ONNX runs on CPU with ~10x lower memory
194
+ # footprint than the PyTorch+MPS backend. Same model weights, same accuracy.
195
+ LLM_GUARD_USE_ONNX=1
196
+ #
197
+ # RAG_DISABLE_LLM_GUARD=1|0 (default: 0)
198
+ # Hard-disable LLM Guard (Layer 2 of injection check) entirely. Layers 1
199
+ # (regex) and 3 (LLM classifier) still run. Useful for non-adversarial
200
+ # workloads (e.g. FinanceBench eval) where Layer 2 adds no signal but
201
+ # consumes memory.
202
+ RAG_DISABLE_LLM_GUARD=0
@@ -0,0 +1,164 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # Environment variables
16
+ .env
17
+ .env.bak
18
+ .env.local
19
+ .env.production
20
+
21
+ # IDE
22
+ .vscode/
23
+ .idea/
24
+ *.swp
25
+ *.swo
26
+ *~
27
+
28
+ # macOS
29
+ .DS_Store
30
+ .AppleDouble
31
+ .LSOverride
32
+
33
+ # Data (raw PDFs can be large)
34
+ data/raw/
35
+ # Raw SEC HTMLs (downloaded; regenerable via scripts/download_sec_filings.py)
36
+ data/raw/sec/
37
+
38
+ # Qdrant local storage
39
+ qdrant_data/
40
+
41
+ # Postgres local data
42
+ pg_data/
43
+
44
+ # Sprint 7.17 grader LoRA-FT v1 exploratory artifacts (Signal 12 failure — base
45
+ # model 45× below the validated capacity floor). Kept locally for ablation
46
+ # reproducibility; not tracked because the experiment is documented in
47
+ # docs/engineering-log.md Sprint 7.17 and the adapter outputs aren't reusable.
48
+ data/models/grader_ft_v1_hard_r8/
49
+ data/models/grader_ft_v1_mixed_r8/
50
+ data/models/grader_ft_v1_random_r8/
51
+ data/training/grader_ft_v1/
52
+
53
+ # Evaluation results (regenerated)
54
+ tests/evaluation/eval_results/*.json
55
+ # ...except committed baselines, sprint milestone snapshots, and final results
56
+ !tests/evaluation/eval_results/baseline_*.json
57
+ !tests/evaluation/eval_results/after_sprint*.json
58
+ !tests/evaluation/eval_results/final_*.json
59
+ # Canonical milestone results under the calibrated Sonnet 4.6 + v2 judge (κ=0.932)
60
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_v1_grader.json
61
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_v1_grader.correctness.json
62
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_v1_grader.rejudged_sonnet_v2.correctness.json
63
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_v1_grader.rejudged_sonnet_v2.diff.json
64
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.json
65
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.correctness.json
66
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.rejudged_sonnet_v2.correctness.json
67
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.rejudged_sonnet_v2.diff.json
68
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.ragas.json
69
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_4fix_plus_fix2.deepeval.json
70
+ # Sprint 7.16 milestone (gen-v2 = anti-refusal nudge + enumerate-fully clause 8)
71
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.json
72
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.correctness.json
73
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.rejudged_sonnet_v2.correctness.json
74
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.rejudged_sonnet_v2.diff.json
75
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.ragas.json
76
+ !tests/evaluation/eval_results/financebench_pypdf_voyage_tiered_ft_litellm_gen_v2.deepeval.json
77
+ # Sprint 7.16 validation artifacts
78
+ !tests/evaluation/eval_results/validation_antirefusal_nudge.json
79
+ !tests/evaluation/eval_results/validation_enumerate_fully.json
80
+ !tests/evaluation/eval_results/validation_directional_clause9.json
81
+ # Sprint 7.16 diagnostic artifacts (audit refresh + retrieval/reranker attribution + grader-on-gold)
82
+ !tests/evaluation/eval_results/audit_failed_qs_gen_v2.json
83
+ !tests/evaluation/eval_results/diag2_retrieval_reranker_attribution.json
84
+ !tests/evaluation/eval_results/diag3_grader_on_gold_chunks.json
85
+ # Sprint 7.17 grader-architecture experiment results
86
+ !tests/evaluation/eval_results/grader_ft_v1_component_eval.json
87
+ !tests/evaluation/eval_results/grader_models_compare.json
88
+ !tests/evaluation/eval_results/grader_haiku_max2048.json
89
+ # Sprint 7.17 follow-up: Caveat B falsification + Llama 3.3 70B via Fireworks
90
+ !tests/evaluation/eval_results/grader_haiku_system_split.json
91
+ !tests/evaluation/eval_results/grader_fireworks_llama33_70b.json
92
+ # Sprint 7.17 follow-up #3: full 150-Q FinanceBench eval w/ Llama-3.3-70B grader (OpenRouter)
93
+ !tests/evaluation/eval_results/financebench_openrouter_llama_grader_v1.json
94
+ !tests/evaluation/eval_results/financebench_openrouter_llama_grader_v1.correctness.json
95
+ !tests/evaluation/eval_results/financebench_openrouter_llama_grader_v1.rejudged_sonnet_v2.correctness.json
96
+ !tests/evaluation/eval_results/financebench_openrouter_llama_grader_v1.rejudged_sonnet_v2.diff.json
97
+ # Sprint 7.18a: RETRIEVAL_TOP_K=50→200 to recover RETRIEVAL_MISS bucket
98
+ !tests/evaluation/eval_results/financebench_retrieval_topk_200_v1.json
99
+ !tests/evaluation/eval_results/financebench_retrieval_topk_200_v1.correctness.json
100
+ !tests/evaluation/eval_results/financebench_retrieval_topk_200_v1.rejudged_sonnet_v2.correctness.json
101
+ !tests/evaluation/eval_results/financebench_retrieval_topk_200_v1.rejudged_sonnet_v2.diff.json
102
+ # Sprint 7.19 Step 0: FT v1 reranker re-enabled (RERANKER_ADAPTER_PATH bug fix)
103
+ !tests/evaluation/eval_results/financebench_ft_v1_reranker_active_v1.json
104
+ !tests/evaluation/eval_results/financebench_ft_v1_reranker_active_v1.correctness.json
105
+ !tests/evaluation/eval_results/financebench_ft_v1_reranker_active_v1.rejudged_sonnet_v2.correctness.json
106
+ !tests/evaluation/eval_results/financebench_ft_v1_reranker_active_v1.rejudged_sonnet_v2.diff.json
107
+ # Methodology artifacts referenced in docs/engineering-log.md
108
+ !tests/evaluation/eval_results/audit_failed_qs_v1_grader.json
109
+ !tests/evaluation/eval_results/audit_failed_qs_4fix.json
110
+ !tests/evaluation/eval_results/hallu_model_ablation.json
111
+ !tests/evaluation/eval_results/slice_analysis_4fix_plus_fix2.json
112
+ !tests/evaluation/eval_results/pipeline_diagnostic_results.json
113
+ # ...but re-ignore pipeline caches (they're intermediate, regenerated on each run)
114
+ tests/evaluation/eval_results/*.pipeline.json
115
+
116
+ # Jupyter
117
+ .ipynb_checkpoints/
118
+
119
+ # Docker
120
+ *.log
121
+
122
+ # Coverage
123
+ htmlcov/
124
+ .coverage
125
+ .coverage.*
126
+
127
+ # Runtime logs (from restart.sh)
128
+ logs/
129
+
130
+ # Claude Code local tool state
131
+ .claude/
132
+ CLAUDE.md
133
+
134
+ # Internal planning / session notes — kept locally for context, not in public repo.
135
+ # The polished public-facing version of this material lives in docs/engineering-log.md.
136
+ SESSION_HANDOFF.md
137
+ IMPLEMENTATION_PLAN.md
138
+ IMPROVEMENT_PLAN.md
139
+ PROJECT_MASTER_DOCUMENT.md
140
+ DEPLOYMENT_PLAN.md
141
+ COMMANDS.txt
142
+
143
+ # CampusX course materials (reference tutorials, not part of project)
144
+ _course_materials/
145
+
146
+ # Personal scratch
147
+ Untitled.txt
148
+
149
+ # Cost tracker — internal LLM spend audit trail, kept locally only
150
+ cost_logs/
151
+
152
+ # Sprint 7.19 Step 1: FT v2 reranker full FinanceBench eval (the actual gate)
153
+ !tests/evaluation/eval_results/financebench_ft_v2_reranker_active_v1.json
154
+ !tests/evaluation/eval_results/financebench_ft_v2_reranker_active_v1.correctness.json
155
+ !tests/evaluation/eval_results/financebench_ft_v2_reranker_active_v1.rejudged_sonnet_v2.correctness.json
156
+ !tests/evaluation/eval_results/financebench_ft_v2_reranker_active_v1.rejudged_sonnet_v2.diff.json
157
+ !tests/evaluation/eval_results/eval_reranker_stratified_v2.json
158
+
159
+ # Sprint 7.19 Step 1 partial-adapter backups (kept locally for ablation, not pushed)
160
+ data/models/reranker_ft_v2_local_partial/
161
+ data/models/reranker_ft_v2_partial_epoch1/
162
+
163
+ # Publishing assets (mockups, reference images, drafts) — kept local
164
+ publish-assets/
@@ -0,0 +1,44 @@
1
+ # === Build stage ===
2
+ FROM python:3.12-slim AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY pyproject.toml README.md ./
11
+ RUN pip install --no-cache-dir .
12
+
13
+ # === Runtime stage ===
14
+ FROM python:3.12-slim
15
+
16
+ LABEL maintainer="Rishabh" \
17
+ description="Enterprise RAG Agent API" \
18
+ version="0.1.0"
19
+
20
+ WORKDIR /app
21
+
22
+ # Create non-root user
23
+ RUN groupadd --gid 1000 appuser && \
24
+ useradd --uid 1000 --gid appuser --shell /bin/bash --create-home appuser
25
+
26
+ # Copy installed packages from builder
27
+ COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
28
+ COPY --from=builder /usr/local/bin /usr/local/bin
29
+
30
+ # Copy application code
31
+ COPY src/ src/
32
+ COPY scripts/ scripts/
33
+ COPY data/sample/ data/sample/
34
+
35
+ # Change ownership and switch to non-root
36
+ RUN chown -R appuser:appuser /app
37
+ USER appuser
38
+
39
+ EXPOSE 8000
40
+
41
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
42
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
43
+
44
+ CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,92 @@
1
+ .PHONY: run dev frontend test test-unit test-integration eval lint format ingest seed-db jwt docker-up docker-down docker-all docker-build docker-prod docker-logs docker-ps docker-restart check clean migrate migrate-down migrate-create migrate-current
2
+
3
+ # --- Development ---
4
+ run:
5
+ uvicorn src.api.main:app --reload --port 8000
6
+
7
+ dev: docker-up
8
+ uvicorn src.api.main:app --reload --port 8000
9
+
10
+ frontend:
11
+ python -m src.frontend.gradio_app
12
+
13
+ # --- Testing ---
14
+ test:
15
+ pytest tests/unit/ tests/integration/ -v
16
+
17
+ test-unit:
18
+ pytest tests/unit/ -v
19
+
20
+ test-integration:
21
+ pytest tests/integration/ -v --timeout=120
22
+
23
+ eval:
24
+ python tests/evaluation/run_evaluation.py --output tests/evaluation/eval_results/latest.json
25
+
26
+ lint:
27
+ ruff check src/ tests/
28
+ ruff format --check src/ tests/
29
+
30
+ format:
31
+ ruff check --fix src/ tests/
32
+ ruff format src/ tests/
33
+
34
+ # --- Data ---
35
+ ingest:
36
+ python scripts/ingest_documents.py --input data/raw/ --collection financial_docs
37
+
38
+ seed-db:
39
+ python scripts/seed_qdrant.py --sample
40
+
41
+ jwt:
42
+ python scripts/generate_jwt.py --role finance --user-id test_user
43
+
44
+ # --- Docker ---
45
+ docker-up:
46
+ docker compose up -d qdrant postgres
47
+
48
+ docker-down:
49
+ docker compose down
50
+
51
+ docker-all:
52
+ docker compose up --build
53
+
54
+ # --- Production ---
55
+ docker-build:
56
+ docker compose build
57
+
58
+ docker-prod:
59
+ docker compose up -d --build
60
+
61
+ docker-logs:
62
+ docker compose logs -f
63
+
64
+ docker-ps:
65
+ docker compose ps
66
+
67
+ docker-restart:
68
+ docker compose restart api frontend
69
+
70
+ # --- Migrations (Sprint 9.0: alembic for the roles table; will grow) ---
71
+ migrate:
72
+ alembic upgrade head
73
+
74
+ migrate-down:
75
+ alembic downgrade -1
76
+
77
+ migrate-current:
78
+ alembic current
79
+
80
+ # Usage: make migrate-create m="add foo table"
81
+ migrate-create:
82
+ alembic revision -m "$(m)"
83
+
84
+ # --- Checks ---
85
+ check: lint test-unit
86
+ @echo "All checks passed"
87
+
88
+ # --- Cleanup ---
89
+ clean:
90
+ find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
91
+ find . -type f -name "*.pyc" -delete 2>/dev/null || true
92
+ rm -rf .pytest_cache/ htmlcov/ .coverage