flowapy 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flowapy-0.1.2 → flowapy-0.2.0}/PKG-INFO +2 -1
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/README.md +11 -9
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3390%2Fijns11010016/pdf_index.pkl.zst +0 -0
- flowapy-0.2.0/examples/demo/fixtures/papers/10.3390%2Fijns6020031/pdf_index.pkl.zst +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/main.py +13 -18
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_resolve.py +13 -28
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/uv.lock +33 -6
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/package.json +2 -2
- flowapy-0.2.0/prompts/generic/aggregation_schema.py +36 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/pyproject.toml +2 -1
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/aggregate.py +11 -21
- flowapy-0.2.0/src/flowa/artifact.py +73 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/convert.py +45 -20
- flowapy-0.2.0/src/flowa/pdf_index_cache.py +133 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/resolve.py +54 -69
- flowapy-0.2.0/tests/test_pdf_index_cache.py +120 -0
- flowapy-0.2.0/tests/test_resolve.py +158 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/uv.lock +33 -6
- flowapy-0.1.2/prompts/generic/aggregation_schema.py +0 -95
- flowapy-0.1.2/tests/test_resolve.py +0 -207
- {flowapy-0.1.2 → flowapy-0.2.0}/.env.example +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/dependabot.yml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/dependabot-auto-merge.yml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/lint.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-chat-service.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-flowapy.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-react-viewer.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.gitignore +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.markdownlint.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.nvmrc +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.pre-commit-config.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/.prettierignore +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/Dockerfile +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/LICENSE +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/README.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/docs/images/viewer.png +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/.gitkeep +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/.env.example +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/.gitignore +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/LICENSES.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/aggregation.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1002%2Fhumu.23878.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1016%2Fj.ymgmr.2024.101163.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1038%2Fs41598-022-25914-8.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs12881-019-0878-8.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-021-01817-1.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-021-02146-z.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-023-02848-6.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13052-019-0692-0.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffcvm.2022.1061384.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffcvm.2023.1261172.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffimmu.2024.1336599.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffped.2021.729824.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffphar.2022.903488.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3390%2Fijns11010016.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3390%2Fijns6020031.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/query.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/runs/cfc0186a7b7e46eb802a516b86ec207f/progress.jsonl +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/variant_details.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fajmg.a.61481/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ejmg.2020.103997/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.nmd.2022.02.002/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.tjog.2022.07.008/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1093%2Fhmg%2Fddz218/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1136%2Fjmg-2022-108675/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/markdown.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/metadata.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/source.pdf +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/next-env.d.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/next.config.mjs +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/package.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/postcss.config.cjs +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/public/favicon.svg +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/chat-service.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/copy-pdfjs-assets.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/exercise-llm.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/start.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/LiteratureView.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/PaperStatusGroup.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/ProgressLog.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/matchFilename.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/db/migrate.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/db/schema.sql +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/aggregate.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/chatSessionClient.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/citationResolverClient.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/demoConfig.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/papers.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/progressEvents.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/runs.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/triageBackendClient.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/triageDb.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/variantId.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/_app.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/aggregate/[variantId]/[category].ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/edit-drafts/[variantId]/[category]/[version].ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/edit-drafts/[variantId]/[category]/index.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/papers/[doi]/pdf.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/papers/index.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/[variantId]/[runId]/progress.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/index.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/latest.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/claim.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/comment.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/paper-done.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/snapshot/[variantId]/[category]/[version].ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/index.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/variants/[variantId].tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/viewer/[variantId]/[category].tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/styles/globals.css +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/tailwind.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/LiteratureView.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/ProgressLog.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/aggregate.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/chat-service.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/index-page.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/matchFilename.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers-pdf-upload.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers-route.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/progress-route.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs-latest-route.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs-route.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/setup.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/triage.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/variantId.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/tsconfig.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/vitest.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/README.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/pyproject.toml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/__init__.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/config.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/progress.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/runs.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/__init__.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/conftest.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_main.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_progress.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_runs.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/package.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/.gitkeep +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/Dockerfile +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/LICENSE +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/README.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/artifact.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/audit.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/auth/jwt.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/auth/oidc.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/chat.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/cli.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/index.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/instrumentation.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/anthropic.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/bedrock.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/factory.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/google-gla.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/google-vertex.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/interface.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/openai.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/prompts.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/server.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/session.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/factory.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/fs.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/gcs.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/interface.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/s3.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage-keys.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/telemetry.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/text.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/yaml.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/chat.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/generic-prompt.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/llm-factory.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/oidc.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/paper-cache.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-fs.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-gcs.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-s3.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/text.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/yaml.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/tsconfig.build.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/tsconfig.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/vitest.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/LICENSE +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/README.md +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/package.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/sanitize.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/sanitize.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/types.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/index.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/llm-content/LlmContent.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/llm-content/LlmContent.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/PdfHighlightViewer.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/PdfHighlightViewer.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/types.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/styles.css +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ChatDrawer.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ChatSection.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ClaimList.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/EvidenceViewerShell.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/EvidenceViewerShell.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/FocusCard.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperHeader.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperRail.test.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperRail.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/SynthesisPanel.tsx +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/backend.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-resolver.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-utils.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-utils.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/claim-refs.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/claim-refs.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/keyboard.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/keyboard.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/store.test.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/store.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/types.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tailwind.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tsconfig.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tsup.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/vitest.config.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/pnpm-lock.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/pnpm-workspace.yaml +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_edit_prompt.txt +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_edit_schema.ts +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_prompt.txt +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/extraction_prompt.txt +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/extraction_schema.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/transcription_prompt.txt +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/prompts/package.json +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/__init__.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/cli.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/clinvar.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/download.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/extract.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/http_retry.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/models.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/normalize.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/progress.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/prompts/__init__.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/py.typed +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/query.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/run.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/schema.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/settings.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/storage.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/tests/__init__.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/tests/test_progress.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/tests/test_prompts.py +0 -0
- {flowapy-0.1.2 → flowapy-0.2.0}/tsconfig.base.json +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: flowapy
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Variant literature assessment pipeline with AI extraction
|
|
5
5
|
Project-URL: Homepage, https://github.com/populationgenomics/flowa
|
|
6
6
|
Project-URL: Source, https://github.com/populationgenomics/flowa
|
|
@@ -47,6 +47,7 @@ Requires-Dist: pypdf
|
|
|
47
47
|
Requires-Dist: s3fs
|
|
48
48
|
Requires-Dist: tenacity
|
|
49
49
|
Requires-Dist: typer
|
|
50
|
+
Requires-Dist: zstandard
|
|
50
51
|
Provides-Extra: anthropic
|
|
51
52
|
Requires-Dist: pydantic-ai-slim[anthropic]==1.101.0; extra == 'anthropic'
|
|
52
53
|
Provides-Extra: bedrock
|
|
@@ -166,10 +166,10 @@ VARIANT=NM_001035_3-c_14174A_G
|
|
|
166
166
|
rm -f assessments/$VARIANT/aggregation.json \
|
|
167
167
|
assessments/$VARIANT/aggregation_raw.json
|
|
168
168
|
rm -rf assessments/$VARIANT/extractions/ assessments/$VARIANT/runs/
|
|
169
|
-
# Re-runs flowa.convert (which uses anchorite for PDF chunking
|
|
170
|
-
# Drop this line to reuse the cached markdown
|
|
171
|
-
# aggregate.
|
|
172
|
-
rm -f papers/*/markdown.md papers/*/convert_raw.json
|
|
169
|
+
# Re-runs flowa.convert (which uses anchorite for PDF chunking and
|
|
170
|
+
# builds pdf_index.pkl.zst). Drop this line to reuse the cached markdown
|
|
171
|
+
# + index and only redo extract + aggregate.
|
|
172
|
+
rm -f papers/*/markdown.md papers/*/convert_raw.json papers/*/pdf_index.pkl.zst
|
|
173
173
|
```
|
|
174
174
|
|
|
175
175
|
Then drive the pipeline. The demo's `scripts/start.ts` translates the
|
|
@@ -208,11 +208,13 @@ and not needed by anything downstream.
|
|
|
208
208
|
For papers whose source license blocks redistribution (CC-BY-NC-ND,
|
|
209
209
|
paywalled; see `fixtures/LICENSES.md` for the rule), do **not** delete
|
|
210
210
|
the whole `papers/{encodedDoi}/` directory — only delete `source.pdf`,
|
|
211
|
-
`markdown.md`,
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
211
|
+
`markdown.md`, `convert_raw.json`, and `pdf_index.pkl.zst`. The
|
|
212
|
+
`pdf_index.pkl.zst` embeds the PDF's extracted text (anchorite's char
|
|
213
|
+
index), so it carries the same copyright as `source.pdf` and must not
|
|
214
|
+
ship in the open-source repo. Keep `metadata.json` (the bibliographic
|
|
215
|
+
fields are factual data, not copyrightable) but replace its `abstract`
|
|
216
|
+
field with a sentinel string, so the omission reads as deliberate (not
|
|
217
|
+
a missing-data bug) when the literature view renders the row:
|
|
216
218
|
|
|
217
219
|
```bash
|
|
218
220
|
python3 -c "
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -18,9 +18,13 @@ from typing import Annotated
|
|
|
18
18
|
import uvicorn
|
|
19
19
|
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, status
|
|
20
20
|
from fastapi.middleware.cors import CORSMiddleware
|
|
21
|
-
from flowa.resolve import
|
|
21
|
+
from flowa.resolve import (
|
|
22
|
+
ResolvedCitations,
|
|
23
|
+
ResolveRequest,
|
|
24
|
+
load_pdf_index_from_storage,
|
|
25
|
+
resolve_citations,
|
|
26
|
+
)
|
|
22
27
|
from flowa.schema import VariantSpec
|
|
23
|
-
from flowa.storage import paper_url, read_bytes, read_text
|
|
24
28
|
from pydantic import BaseModel, Field
|
|
25
29
|
|
|
26
30
|
from .config import Settings
|
|
@@ -111,24 +115,15 @@ def resolve_citations_route(
|
|
|
111
115
|
) -> ResolvedCitations:
|
|
112
116
|
"""Align verbatim quotes to PDF bboxes.
|
|
113
117
|
|
|
114
|
-
Sync `def` so FastAPI auto-runs it in the threadpool —
|
|
115
|
-
|
|
118
|
+
Sync `def` so FastAPI auto-runs it in the threadpool — deserialising the
|
|
119
|
+
PdfIndex pickle and aligning quotes is CPU-bound and would block the
|
|
120
|
+
asyncio loop otherwise.
|
|
116
121
|
"""
|
|
117
122
|
base = str(settings.demo_data_dir)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
except FileNotFoundError:
|
|
123
|
-
return None
|
|
124
|
-
|
|
125
|
-
def md_loader(doi: str) -> str | None:
|
|
126
|
-
try:
|
|
127
|
-
return read_text(paper_url(base, doi, 'markdown.md'))
|
|
128
|
-
except FileNotFoundError:
|
|
129
|
-
return None
|
|
130
|
-
|
|
131
|
-
return resolve_citations(body.citations, pdf_loader=pdf_loader, markdown_loader=md_loader)
|
|
123
|
+
return resolve_citations(
|
|
124
|
+
body.citations,
|
|
125
|
+
index_provider=lambda doi: load_pdf_index_from_storage(base, doi),
|
|
126
|
+
)
|
|
132
127
|
|
|
133
128
|
|
|
134
129
|
@asynccontextmanager
|
|
@@ -1,24 +1,13 @@
|
|
|
1
1
|
"""HTTP shape tests for /resolve-citations.
|
|
2
2
|
|
|
3
3
|
Library-level resolver behaviour is covered in flowa's `tests/test_resolve.py`;
|
|
4
|
-
here we just check that the route plumbs Settings →
|
|
5
|
-
correctly and returns the expected wire shape.
|
|
4
|
+
here we just check that the route plumbs Settings → index_provider →
|
|
5
|
+
flowa.resolve correctly and returns the expected wire shape.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
8
|
from fastapi.testclient import TestClient
|
|
11
|
-
from flowa import resolve as flowa_resolve_module
|
|
12
|
-
from flowa.storage import encode_doi
|
|
13
|
-
|
|
14
|
-
from demo_gateway.config import Settings
|
|
15
|
-
|
|
16
9
|
|
|
17
|
-
|
|
18
|
-
paper_dir = data_dir / 'papers' / encode_doi(doi)
|
|
19
|
-
paper_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
-
(paper_dir / 'source.pdf').write_bytes(pdf)
|
|
21
|
-
(paper_dir / 'markdown.md').write_text(markdown)
|
|
10
|
+
import demo_gateway.main as demo_main
|
|
22
11
|
|
|
23
12
|
|
|
24
13
|
def test_post_resolve_citations_rejects_malformed_body(client: TestClient) -> None:
|
|
@@ -26,8 +15,8 @@ def test_post_resolve_citations_rejects_malformed_body(client: TestClient) -> No
|
|
|
26
15
|
assert response.status_code == 422
|
|
27
16
|
|
|
28
17
|
|
|
29
|
-
def
|
|
30
|
-
"""When
|
|
18
|
+
def test_post_resolve_citations_returns_errors_for_missing_index(client: TestClient) -> None:
|
|
19
|
+
"""When pdf_index.pkl.zst is absent, the DOI surfaces in `errors` rather than `resolved`."""
|
|
31
20
|
response = client.post(
|
|
32
21
|
'/resolve-citations',
|
|
33
22
|
json={'citations': [{'doi': '10.1/missing', 'quotes': ['anything']}]},
|
|
@@ -35,15 +24,11 @@ def test_post_resolve_citations_returns_errors_for_missing_pdfs(client: TestClie
|
|
|
35
24
|
assert response.status_code == 200
|
|
36
25
|
body = response.json()
|
|
37
26
|
assert body['resolved'] == {}
|
|
38
|
-
assert body['errors'] == {'10.1/missing': '
|
|
27
|
+
assert body['errors'] == {'10.1/missing': 'pdf_index not available'}
|
|
39
28
|
|
|
40
29
|
|
|
41
|
-
def test_post_resolve_citations_returns_resolved_bboxes(
|
|
42
|
-
|
|
43
|
-
settings: Settings,
|
|
44
|
-
monkeypatch,
|
|
45
|
-
) -> None:
|
|
46
|
-
"""When source.pdf exists, the route resolves quotes to bboxes via the library."""
|
|
30
|
+
def test_post_resolve_citations_returns_resolved_bboxes(client: TestClient, monkeypatch) -> None:
|
|
31
|
+
"""When the index loads, the route resolves quotes to bboxes via the library."""
|
|
47
32
|
|
|
48
33
|
class _FakeBbox:
|
|
49
34
|
def __init__(self, top: int, left: int, bottom: int, right: int) -> None:
|
|
@@ -53,16 +38,16 @@ def test_post_resolve_citations_returns_resolved_bboxes(
|
|
|
53
38
|
self.right = right
|
|
54
39
|
|
|
55
40
|
class _FakePdfIndex:
|
|
56
|
-
def __init__(self, _pdf_bytes: bytes, *, markdown: str | None = None) -> None:
|
|
57
|
-
pass
|
|
58
|
-
|
|
59
41
|
def resolve(self, quotes: list[str]) -> dict[str, list[tuple[int, _FakeBbox]]]:
|
|
60
42
|
# 0-indexed page from anchorite — the +1 boundary wrap in resolve.py
|
|
61
43
|
# turns this into page=1 on the wire.
|
|
62
44
|
return {q: [(0, _FakeBbox(top=10, left=20, bottom=30, right=40))] for q in quotes}
|
|
63
45
|
|
|
64
|
-
monkeypatch.setattr(
|
|
65
|
-
|
|
46
|
+
monkeypatch.setattr(
|
|
47
|
+
demo_main,
|
|
48
|
+
'load_pdf_index_from_storage',
|
|
49
|
+
lambda _base, doi: _FakePdfIndex() if doi == '10.1/present' else None,
|
|
50
|
+
)
|
|
66
51
|
|
|
67
52
|
response = client.post(
|
|
68
53
|
'/resolve-citations',
|
|
@@ -3,14 +3,14 @@ revision = 3
|
|
|
3
3
|
requires-python = "==3.13.*"
|
|
4
4
|
|
|
5
5
|
[options]
|
|
6
|
-
exclude-newer = "
|
|
6
|
+
exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
|
|
7
7
|
exclude-newer-span = "P7D"
|
|
8
8
|
|
|
9
9
|
[options.exclude-newer-package]
|
|
10
|
-
pydantic-ai-slim = { timestamp = "
|
|
11
|
-
anchorite = { timestamp = "
|
|
12
|
-
seq-smith = { timestamp = "
|
|
13
|
-
pydantic-graph = { timestamp = "
|
|
10
|
+
pydantic-ai-slim = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
|
|
11
|
+
anchorite = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
|
|
12
|
+
seq-smith = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
|
|
13
|
+
pydantic-graph = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
|
|
14
14
|
|
|
15
15
|
[[package]]
|
|
16
16
|
name = "aiohappyeyeballs"
|
|
@@ -415,7 +415,7 @@ wheels = [
|
|
|
415
415
|
|
|
416
416
|
[[package]]
|
|
417
417
|
name = "flowapy"
|
|
418
|
-
version = "0.
|
|
418
|
+
version = "0.2.0"
|
|
419
419
|
source = { editable = "../../" }
|
|
420
420
|
dependencies = [
|
|
421
421
|
{ name = "anchorite" },
|
|
@@ -431,6 +431,7 @@ dependencies = [
|
|
|
431
431
|
{ name = "s3fs" },
|
|
432
432
|
{ name = "tenacity" },
|
|
433
433
|
{ name = "typer" },
|
|
434
|
+
{ name = "zstandard" },
|
|
434
435
|
]
|
|
435
436
|
|
|
436
437
|
[package.optional-dependencies]
|
|
@@ -466,6 +467,7 @@ requires-dist = [
|
|
|
466
467
|
{ name = "s3fs" },
|
|
467
468
|
{ name = "tenacity" },
|
|
468
469
|
{ name = "typer" },
|
|
470
|
+
{ name = "zstandard" },
|
|
469
471
|
]
|
|
470
472
|
provides-extras = ["anthropic", "bedrock", "google", "openai"]
|
|
471
473
|
|
|
@@ -2109,3 +2111,28 @@ sdist = { url = "https://files.pythonhosted.org/packages/30/21/093488dfc7cc8964d
|
|
|
2109
2111
|
wheels = [
|
|
2110
2112
|
{ url = "https://files.pythonhosted.org/packages/08/8a/0861bec20485572fbddf3dfba2910e38fe249796cb73ecdeb74e07eeb8d3/zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc", size = 10378, upload-time = "2026-04-13T23:21:45.386Z" },
|
|
2111
2113
|
]
|
|
2114
|
+
|
|
2115
|
+
[[package]]
|
|
2116
|
+
name = "zstandard"
|
|
2117
|
+
version = "0.25.0"
|
|
2118
|
+
source = { registry = "https://pypi.org/simple" }
|
|
2119
|
+
sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" }
|
|
2120
|
+
wheels = [
|
|
2121
|
+
{ url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" },
|
|
2122
|
+
{ url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" },
|
|
2123
|
+
{ url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" },
|
|
2124
|
+
{ url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" },
|
|
2125
|
+
{ url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" },
|
|
2126
|
+
{ url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" },
|
|
2127
|
+
{ url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" },
|
|
2128
|
+
{ url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" },
|
|
2129
|
+
{ url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" },
|
|
2130
|
+
{ url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" },
|
|
2131
|
+
{ url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" },
|
|
2132
|
+
{ url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" },
|
|
2133
|
+
{ url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" },
|
|
2134
|
+
{ url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" },
|
|
2135
|
+
{ url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" },
|
|
2136
|
+
{ url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" },
|
|
2137
|
+
{ url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" },
|
|
2138
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@flowajs/chat-service",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Stateless service that orchestrates LLM conversations over flowa artifacts.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -71,7 +71,7 @@
|
|
|
71
71
|
"zod": "4.4.3"
|
|
72
72
|
},
|
|
73
73
|
"peerDependencies": {
|
|
74
|
-
"@ai-sdk/amazon-bedrock": "^4.0.
|
|
74
|
+
"@ai-sdk/amazon-bedrock": "^4.0.101",
|
|
75
75
|
"@ai-sdk/anthropic": "^3.0.0",
|
|
76
76
|
"@ai-sdk/google": "^3.0.0",
|
|
77
77
|
"@ai-sdk/google-vertex": "^4.0.0",
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Aggregation schema for generic ACMG-style variant assessment.
|
|
2
|
+
|
|
3
|
+
This module defines the output structure for aggregation across papers.
|
|
4
|
+
The AggregationResult class is loaded dynamically by Flowa.
|
|
5
|
+
|
|
6
|
+
Interface requirements (accessed by Flowa's validation logic):
|
|
7
|
+
- results[].category must exist
|
|
8
|
+
- results[].claims[].paper_id and .citations[].quote must exist
|
|
9
|
+
- results[].papers[].paper_id must exist
|
|
10
|
+
|
|
11
|
+
Strict structured outputs (Bedrock/Anthropic/OpenAI NativeOutput) clobber
|
|
12
|
+
additionalProperties to false, collapsing dict[str, X] fields to empty objects.
|
|
13
|
+
Use a list shape with the category carried as a field inside each entry.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
from flowa.artifact import CategoryResult as BaseCategoryResult
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CategoryResult(BaseCategoryResult):
|
|
22
|
+
"""ACMG-classification result for a single assessment category."""
|
|
23
|
+
|
|
24
|
+
classification: str = Field(
|
|
25
|
+
description='ACMG classification: Pathogenic, Likely Pathogenic, VUS, Likely Benign, or Benign.'
|
|
26
|
+
)
|
|
27
|
+
classification_rationale: str = Field(description='Brief explanation of why this classification was selected.')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AggregationResult(BaseModel):
|
|
31
|
+
"""Multi-category aggregation result for ACMG-style variant assessment."""
|
|
32
|
+
|
|
33
|
+
results: list[CategoryResult] = Field(
|
|
34
|
+
description='List of assessment results, one per selected category. '
|
|
35
|
+
'Each entry carries its own `category` identifier.'
|
|
36
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "flowapy"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Variant literature assessment pipeline with AI extraction"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = "==3.13.*"
|
|
@@ -27,6 +27,7 @@ dependencies = [
|
|
|
27
27
|
"s3fs", # S3/MinIO support for fsspec
|
|
28
28
|
"tenacity",
|
|
29
29
|
"typer",
|
|
30
|
+
"zstandard",
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
[project.optional-dependencies]
|
|
@@ -15,7 +15,7 @@ from pydantic_ai import Agent, ModelRetry, NativeOutput, RunContext
|
|
|
15
15
|
from flowa.clinvar import format_clinvar_for_prompt, query_clinvar
|
|
16
16
|
from flowa.models import create_model, get_model_settings
|
|
17
17
|
from flowa.prompts import load_prompt_and_schema
|
|
18
|
-
from flowa.resolve import CitationQuery, resolve_citations
|
|
18
|
+
from flowa.resolve import CitationQuery, load_pdf_index_from_storage, resolve_citations
|
|
19
19
|
from flowa.schema import AGGREGATION_SCHEMA_VERSION, with_schema_version
|
|
20
20
|
from flowa.settings import ModelConfig, Settings
|
|
21
21
|
from flowa.storage import (
|
|
@@ -23,9 +23,7 @@ from flowa.storage import (
|
|
|
23
23
|
encode_doi,
|
|
24
24
|
exists,
|
|
25
25
|
paper_url,
|
|
26
|
-
read_bytes,
|
|
27
26
|
read_json,
|
|
28
|
-
read_text,
|
|
29
27
|
write_bytes,
|
|
30
28
|
write_json,
|
|
31
29
|
)
|
|
@@ -158,16 +156,14 @@ def create_aggregate_agent(
|
|
|
158
156
|
def resolve_aggregate_citations(
|
|
159
157
|
aggregate_dict: dict[str, Any],
|
|
160
158
|
paper_id_to_doi: dict[str, str],
|
|
161
|
-
|
|
162
|
-
markdown_cache: dict[str, str],
|
|
159
|
+
base: str,
|
|
163
160
|
metadata_cache: dict[str, dict[str, Any]],
|
|
164
161
|
) -> None:
|
|
165
162
|
"""Post-process aggregate output: resolve quotes to bboxes on claim citations.
|
|
166
163
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
quote) pair resolves to exactly one paper.
|
|
164
|
+
Loads each paper's pre-built `pdf_index.pkl.zst` via the same path the
|
|
165
|
+
gateway uses; the convert step that ran earlier in this pipeline wrote
|
|
166
|
+
the artifact, so it's guaranteed to be present.
|
|
171
167
|
"""
|
|
172
168
|
# Collect all (doi, quote) pairs, grouped by DOI.
|
|
173
169
|
doi_quotes: dict[str, list[str]] = {}
|
|
@@ -180,8 +176,7 @@ def resolve_aggregate_citations(
|
|
|
180
176
|
citations_input = [CitationQuery(doi=doi, quotes=quotes) for doi, quotes in doi_quotes.items()]
|
|
181
177
|
result = resolve_citations(
|
|
182
178
|
citations_input,
|
|
183
|
-
|
|
184
|
-
markdown_loader=markdown_cache.get,
|
|
179
|
+
index_provider=lambda doi: load_pdf_index_from_storage(base, doi),
|
|
185
180
|
)
|
|
186
181
|
|
|
187
182
|
# Attach resolved bboxes onto each claim's citations.
|
|
@@ -229,14 +224,11 @@ async def aggregate_evidence_async(
|
|
|
229
224
|
clinvar_data = query_clinvar(hgvs_c_full, ncbi_api_key)
|
|
230
225
|
clinvar_text = format_clinvar_for_prompt(clinvar_data)
|
|
231
226
|
|
|
232
|
-
# Load extractions and metadata for each paper. PDF bytes and
|
|
233
|
-
#
|
|
234
|
-
#
|
|
235
|
-
#
|
|
236
|
-
# surfaces as FileNotFoundError below.
|
|
227
|
+
# Load extractions and metadata for each paper. PDF bytes and markdown
|
|
228
|
+
# are NOT loaded here — the post-LLM citation resolver loads the paper's
|
|
229
|
+
# pre-built `pdf_index.pkl.zst` directly from storage, so this step only
|
|
230
|
+
# needs the LLM inputs.
|
|
237
231
|
evidence_extractions: list[dict[str, Any]] = []
|
|
238
|
-
pdf_bytes_cache: dict[str, bytes] = {}
|
|
239
|
-
markdown_cache: dict[str, str] = {}
|
|
240
232
|
metadata_cache: dict[str, dict[str, Any]] = {}
|
|
241
233
|
|
|
242
234
|
for doi in dois:
|
|
@@ -252,8 +244,6 @@ async def aggregate_evidence_async(
|
|
|
252
244
|
log.info('Skipping %s: variant not discussed', doi)
|
|
253
245
|
continue
|
|
254
246
|
|
|
255
|
-
pdf_bytes_cache[doi] = read_bytes(paper_url(base, doi, 'source.pdf'))
|
|
256
|
-
markdown_cache[doi] = read_text(paper_url(base, doi, 'markdown.md'))
|
|
257
247
|
metadata = read_json(paper_url(base, doi, 'metadata.json'))
|
|
258
248
|
metadata_cache[doi] = metadata
|
|
259
249
|
|
|
@@ -332,7 +322,7 @@ async def aggregate_evidence_async(
|
|
|
332
322
|
# Post-LLM: resolve quotes to bboxes, replace paper_id with DOI
|
|
333
323
|
aggregate_dict = output.model_dump()
|
|
334
324
|
with logfire.span('flowa.resolve_citations', paper_count=len(paper_id_to_doi)):
|
|
335
|
-
resolve_aggregate_citations(aggregate_dict, paper_id_to_doi,
|
|
325
|
+
resolve_aggregate_citations(aggregate_dict, paper_id_to_doi, base, metadata_cache)
|
|
336
326
|
|
|
337
327
|
# Store structured aggregation result
|
|
338
328
|
write_json(aggregation_url, with_schema_version(aggregate_dict, AGGREGATION_SCHEMA_VERSION))
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Shared schema primitives for aggregation prompt sets.
|
|
2
|
+
|
|
3
|
+
`CategoryResult` is the citation-grounded base each deployment derives from
|
|
4
|
+
to add its own classification field(s) — e.g. generic ACMG sets add
|
|
5
|
+
`classification` + `classification_rationale`; private deployments may add
|
|
6
|
+
their own classification dimensions. Each deployment also defines its own
|
|
7
|
+
`AggregationResult` with `results: list[CategoryResult]` pointing at its
|
|
8
|
+
own `CategoryResult` subclass (Pydantic doesn't covariantly substitute the
|
|
9
|
+
parent's parameterised generic).
|
|
10
|
+
|
|
11
|
+
This module is the Python analog of `@flowajs/chat-service`'s
|
|
12
|
+
`artifactFields` / `ArtifactSchema` export: it exists so prompt-set schemas
|
|
13
|
+
share their citation-grounded structure rather than duplicating it.
|
|
14
|
+
|
|
15
|
+
The schema-side descriptions here are deliberately structural — they
|
|
16
|
+
describe what each field IS. Behavioural guidance (how to populate
|
|
17
|
+
`description`, ranking conventions, claim-emission criteria) belongs in the
|
|
18
|
+
aggregation prompt template, not the schema.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AggregateCitation(BaseModel):
|
|
25
|
+
"""A citation quoting a specific passage from a source paper."""
|
|
26
|
+
|
|
27
|
+
quote: str = Field(
|
|
28
|
+
description='A short, distinctive verbatim quote from the paper text that identifies this evidence.'
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Claim(BaseModel):
|
|
33
|
+
"""A factual statement supporting a synthesis, sourced from one paper."""
|
|
34
|
+
|
|
35
|
+
paper_id: str = Field(description='AuthorYear identifier of the source paper; must appear in papers[].')
|
|
36
|
+
text: str = Field(
|
|
37
|
+
description='The factual statement as the curator reads it in triage. '
|
|
38
|
+
'May synthesise across citations from the same paper.'
|
|
39
|
+
)
|
|
40
|
+
citations: list[AggregateCitation] = Field(
|
|
41
|
+
description='One or more supporting quotes from paper_id.',
|
|
42
|
+
min_length=1,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RankedPaper(BaseModel):
|
|
47
|
+
"""A paper in the ranked papers list. List position encodes importance."""
|
|
48
|
+
|
|
49
|
+
paper_id: str = Field(description='AuthorYear identifier; must match the key used in paper_id_mapping.')
|
|
50
|
+
rank_rationale: str = Field(description='One sentence explaining why this paper sits at this rank.')
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CategoryResult(BaseModel):
|
|
54
|
+
"""Citation-grounded base for an aggregation result.
|
|
55
|
+
|
|
56
|
+
Deployments subclass to add classification-specific fields. The five
|
|
57
|
+
fields here are the contract that `flowa.aggregate` reads at validation
|
|
58
|
+
time.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
category: str = Field(description='Assessment-category identifier.')
|
|
62
|
+
description: str = Field(description='Short human-readable summary of the synthesis.')
|
|
63
|
+
notes: str = Field(
|
|
64
|
+
description='Long-form synthesis in Markdown. Uses inline citation links: '
|
|
65
|
+
'[text](#cite:paper_id "verbatim quote") to reference specific paper evidence locations.'
|
|
66
|
+
)
|
|
67
|
+
papers: list[RankedPaper] = Field(
|
|
68
|
+
description='Source documents contributing to the synthesis, ordered by importance. '
|
|
69
|
+
'List position is the rank; paper_id values must be unique.'
|
|
70
|
+
)
|
|
71
|
+
claims: list[Claim] = Field(
|
|
72
|
+
description='Factual claims supporting the synthesis, grouped by paper_id in the same order as papers[].'
|
|
73
|
+
)
|
|
@@ -17,9 +17,11 @@ from pydantic_ai import Agent
|
|
|
17
17
|
from pydantic_ai.messages import BinaryContent
|
|
18
18
|
|
|
19
19
|
from flowa.models import create_model, get_model_settings
|
|
20
|
+
from flowa.pdf_index_cache import build as build_pdf_index_payload
|
|
21
|
+
from flowa.pdf_index_cache import serialize as serialize_pdf_index_payload
|
|
20
22
|
from flowa.prompts import load_text_prompt
|
|
21
23
|
from flowa.settings import ModelConfig, Settings
|
|
22
|
-
from flowa.storage import exists, paper_url, read_bytes, write_bytes, write_text
|
|
24
|
+
from flowa.storage import exists, paper_url, read_bytes, read_text, write_bytes, write_text
|
|
23
25
|
|
|
24
26
|
log = logging.getLogger(__name__)
|
|
25
27
|
|
|
@@ -120,14 +122,22 @@ async def transcribe(
|
|
|
120
122
|
|
|
121
123
|
|
|
122
124
|
async def convert_paper_async(base: str, doi: str, model: ModelConfig, prompt_set: str = 'generic') -> None:
|
|
123
|
-
"""Convert a single paper's PDF to Markdown
|
|
125
|
+
"""Convert a single paper's PDF to Markdown and persist its `PdfIndex`.
|
|
124
126
|
|
|
125
|
-
Reads PDF from papers/{encoded_doi}/source.pdf
|
|
126
|
-
|
|
127
|
+
Reads PDF from papers/{encoded_doi}/source.pdf and writes
|
|
128
|
+
papers/{encoded_doi}/markdown.md plus papers/{encoded_doi}/pdf_index.pkl.zst
|
|
129
|
+
(consumed by the gateway's resolve endpoint).
|
|
130
|
+
|
|
131
|
+
Either artifact can be missing independently — if a previous run failed
|
|
132
|
+
or pre-dates the pdf_index step, the next call fills in only what's
|
|
133
|
+
missing without re-transcribing or re-building work that's already done.
|
|
127
134
|
"""
|
|
128
135
|
md_url = paper_url(base, doi, 'markdown.md')
|
|
136
|
+
index_url = paper_url(base, doi, 'pdf_index.pkl.zst')
|
|
137
|
+
md_needed = not exists(md_url)
|
|
138
|
+
index_needed = not exists(index_url)
|
|
129
139
|
|
|
130
|
-
if
|
|
140
|
+
if not md_needed and not index_needed:
|
|
131
141
|
log.info('Already converted: %s', md_url)
|
|
132
142
|
return
|
|
133
143
|
|
|
@@ -138,21 +148,36 @@ async def convert_paper_async(base: str, doi: str, model: ModelConfig, prompt_se
|
|
|
138
148
|
log.info('Skipping DOI %s: PDF not available', doi)
|
|
139
149
|
return
|
|
140
150
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
151
|
+
markdown: str | None = None
|
|
152
|
+
if md_needed:
|
|
153
|
+
log.info(
|
|
154
|
+
'Converting DOI %s (%d bytes, model: %s, chunk: %d pages)', doi, len(pdf_bytes), model.name, PAGES_PER_CHUNK
|
|
155
|
+
)
|
|
156
|
+
prompt = load_text_prompt('transcription', prompt_set)
|
|
157
|
+
t0 = time.monotonic()
|
|
158
|
+
result = await transcribe(pdf_bytes, model=model, prompt=prompt, page_count=PAGES_PER_CHUNK)
|
|
159
|
+
elapsed = time.monotonic() - t0
|
|
160
|
+
|
|
161
|
+
markdown = result.markdown
|
|
162
|
+
write_text(md_url, markdown)
|
|
163
|
+
write_bytes(paper_url(base, doi, 'convert_raw.json'), json.dumps(result.all_messages).encode())
|
|
164
|
+
log.info('Converted DOI %s: %d chars in %.1fs', doi, len(markdown), elapsed)
|
|
165
|
+
|
|
166
|
+
if index_needed:
|
|
167
|
+
# PdfIndex construction is CPU-bound (~8s on the deployed gateway
|
|
168
|
+
# hardware) and dominates per-call latency at `/api/v1/resolve` if
|
|
169
|
+
# rebuilt on every call. Pay the cost here once per paper and ship
|
|
170
|
+
# the result; see `flowa.pdf_index_cache` for the storage format.
|
|
171
|
+
# `asyncio.to_thread` keeps the rest of the convert pipeline (other
|
|
172
|
+
# papers being transcribed concurrently) unblocked.
|
|
173
|
+
if markdown is None: # index missing but markdown already on disk
|
|
174
|
+
markdown = read_text(md_url)
|
|
175
|
+
t0 = time.monotonic()
|
|
176
|
+
blob = await asyncio.to_thread(
|
|
177
|
+
lambda: serialize_pdf_index_payload(build_pdf_index_payload(pdf_bytes, markdown))
|
|
178
|
+
)
|
|
179
|
+
write_bytes(index_url, blob)
|
|
180
|
+
log.info('Wrote pdf_index for DOI %s: %.1f MB in %.1fs', doi, len(blob) / 1e6, time.monotonic() - t0)
|
|
156
181
|
|
|
157
182
|
|
|
158
183
|
def convert_paper(
|