flowapy 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (308) hide show
  1. {flowapy-0.1.2 → flowapy-0.2.0}/PKG-INFO +2 -1
  2. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/README.md +11 -9
  3. flowapy-0.2.0/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/pdf_index.pkl.zst +0 -0
  4. flowapy-0.2.0/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/pdf_index.pkl.zst +0 -0
  5. flowapy-0.2.0/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/pdf_index.pkl.zst +0 -0
  6. flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/pdf_index.pkl.zst +0 -0
  7. flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/pdf_index.pkl.zst +0 -0
  8. flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/pdf_index.pkl.zst +0 -0
  9. flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/pdf_index.pkl.zst +0 -0
  10. flowapy-0.2.0/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/pdf_index.pkl.zst +0 -0
  11. flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/pdf_index.pkl.zst +0 -0
  12. flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/pdf_index.pkl.zst +0 -0
  13. flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/pdf_index.pkl.zst +0 -0
  14. flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/pdf_index.pkl.zst +0 -0
  15. flowapy-0.2.0/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/pdf_index.pkl.zst +0 -0
  16. flowapy-0.2.0/examples/demo/fixtures/papers/10.3390%2Fijns11010016/pdf_index.pkl.zst +0 -0
  17. flowapy-0.2.0/examples/demo/fixtures/papers/10.3390%2Fijns6020031/pdf_index.pkl.zst +0 -0
  18. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/main.py +13 -18
  19. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_resolve.py +13 -28
  20. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/uv.lock +33 -6
  21. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/package.json +2 -2
  22. flowapy-0.2.0/prompts/generic/aggregation_schema.py +36 -0
  23. {flowapy-0.1.2 → flowapy-0.2.0}/pyproject.toml +2 -1
  24. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/aggregate.py +11 -21
  25. flowapy-0.2.0/src/flowa/artifact.py +73 -0
  26. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/convert.py +45 -20
  27. flowapy-0.2.0/src/flowa/pdf_index_cache.py +133 -0
  28. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/resolve.py +54 -69
  29. flowapy-0.2.0/tests/test_pdf_index_cache.py +120 -0
  30. flowapy-0.2.0/tests/test_resolve.py +158 -0
  31. {flowapy-0.1.2 → flowapy-0.2.0}/uv.lock +33 -6
  32. flowapy-0.1.2/prompts/generic/aggregation_schema.py +0 -95
  33. flowapy-0.1.2/tests/test_resolve.py +0 -207
  34. {flowapy-0.1.2 → flowapy-0.2.0}/.env.example +0 -0
  35. {flowapy-0.1.2 → flowapy-0.2.0}/.github/dependabot.yml +0 -0
  36. {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/dependabot-auto-merge.yml +0 -0
  37. {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/lint.yaml +0 -0
  38. {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-chat-service.yaml +0 -0
  39. {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-flowapy.yaml +0 -0
  40. {flowapy-0.1.2 → flowapy-0.2.0}/.github/workflows/release-react-viewer.yaml +0 -0
  41. {flowapy-0.1.2 → flowapy-0.2.0}/.gitignore +0 -0
  42. {flowapy-0.1.2 → flowapy-0.2.0}/.markdownlint.json +0 -0
  43. {flowapy-0.1.2 → flowapy-0.2.0}/.nvmrc +0 -0
  44. {flowapy-0.1.2 → flowapy-0.2.0}/.pre-commit-config.yaml +0 -0
  45. {flowapy-0.1.2 → flowapy-0.2.0}/.prettierignore +0 -0
  46. {flowapy-0.1.2 → flowapy-0.2.0}/Dockerfile +0 -0
  47. {flowapy-0.1.2 → flowapy-0.2.0}/LICENSE +0 -0
  48. {flowapy-0.1.2 → flowapy-0.2.0}/README.md +0 -0
  49. {flowapy-0.1.2 → flowapy-0.2.0}/docs/images/viewer.png +0 -0
  50. {flowapy-0.1.2 → flowapy-0.2.0}/examples/.gitkeep +0 -0
  51. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/.env.example +0 -0
  52. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/.gitignore +0 -0
  53. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/LICENSES.md +0 -0
  54. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/aggregation.json +0 -0
  55. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1002%2Fhumu.23878.json +0 -0
  56. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1016%2Fj.ymgmr.2024.101163.json +0 -0
  57. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1038%2Fs41598-022-25914-8.json +0 -0
  58. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs12881-019-0878-8.json +0 -0
  59. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-021-01817-1.json +0 -0
  60. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-021-02146-z.json +0 -0
  61. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13023-023-02848-6.json +0 -0
  62. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.1186%2Fs13052-019-0692-0.json +0 -0
  63. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffcvm.2022.1061384.json +0 -0
  64. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffcvm.2023.1261172.json +0 -0
  65. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffimmu.2024.1336599.json +0 -0
  66. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffped.2021.729824.json +0 -0
  67. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3389%2Ffphar.2022.903488.json +0 -0
  68. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3390%2Fijns11010016.json +0 -0
  69. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/extractions/10.3390%2Fijns6020031.json +0 -0
  70. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/query.json +0 -0
  71. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/runs/cfc0186a7b7e46eb802a516b86ec207f/progress.jsonl +0 -0
  72. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/assessments/NM_000152_5-c_1935C_A/variant_details.json +0 -0
  73. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fajmg.a.61481/metadata.json +0 -0
  74. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/markdown.md +0 -0
  75. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/metadata.json +0 -0
  76. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1002%2Fhumu.23878/source.pdf +0 -0
  77. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ejmg.2020.103997/metadata.json +0 -0
  78. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.nmd.2022.02.002/metadata.json +0 -0
  79. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.tjog.2022.07.008/metadata.json +0 -0
  80. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/markdown.md +0 -0
  81. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/metadata.json +0 -0
  82. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1016%2Fj.ymgmr.2024.101163/source.pdf +0 -0
  83. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/markdown.md +0 -0
  84. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/metadata.json +0 -0
  85. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1038%2Fs41598-022-25914-8/source.pdf +0 -0
  86. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1093%2Fhmg%2Fddz218/metadata.json +0 -0
  87. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1136%2Fjmg-2022-108675/metadata.json +0 -0
  88. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/markdown.md +0 -0
  89. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/metadata.json +0 -0
  90. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs12881-019-0878-8/source.pdf +0 -0
  91. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/markdown.md +0 -0
  92. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/metadata.json +0 -0
  93. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-01817-1/source.pdf +0 -0
  94. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/markdown.md +0 -0
  95. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/metadata.json +0 -0
  96. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-021-02146-z/source.pdf +0 -0
  97. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/markdown.md +0 -0
  98. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/metadata.json +0 -0
  99. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13023-023-02848-6/source.pdf +0 -0
  100. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/markdown.md +0 -0
  101. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/metadata.json +0 -0
  102. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.1186%2Fs13052-019-0692-0/source.pdf +0 -0
  103. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/markdown.md +0 -0
  104. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/metadata.json +0 -0
  105. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2022.1061384/source.pdf +0 -0
  106. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/markdown.md +0 -0
  107. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/metadata.json +0 -0
  108. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffcvm.2023.1261172/source.pdf +0 -0
  109. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/markdown.md +0 -0
  110. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/metadata.json +0 -0
  111. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffimmu.2024.1336599/source.pdf +0 -0
  112. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/markdown.md +0 -0
  113. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/metadata.json +0 -0
  114. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffped.2021.729824/source.pdf +0 -0
  115. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/markdown.md +0 -0
  116. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/metadata.json +0 -0
  117. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3389%2Ffphar.2022.903488/source.pdf +0 -0
  118. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/markdown.md +0 -0
  119. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/metadata.json +0 -0
  120. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns11010016/source.pdf +0 -0
  121. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/markdown.md +0 -0
  122. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/metadata.json +0 -0
  123. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/fixtures/papers/10.3390%2Fijns6020031/source.pdf +0 -0
  124. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/next-env.d.ts +0 -0
  125. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/next.config.mjs +0 -0
  126. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/package.json +0 -0
  127. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/postcss.config.cjs +0 -0
  128. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/public/favicon.svg +0 -0
  129. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/chat-service.ts +0 -0
  130. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/copy-pdfjs-assets.ts +0 -0
  131. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/exercise-llm.ts +0 -0
  132. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/scripts/start.ts +0 -0
  133. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/LiteratureView.tsx +0 -0
  134. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/PaperStatusGroup.tsx +0 -0
  135. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/ProgressLog.tsx +0 -0
  136. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/components/literature/matchFilename.ts +0 -0
  137. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/db/migrate.ts +0 -0
  138. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/db/schema.sql +0 -0
  139. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/aggregate.ts +0 -0
  140. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/chatSessionClient.ts +0 -0
  141. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/citationResolverClient.ts +0 -0
  142. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/demoConfig.ts +0 -0
  143. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/papers.ts +0 -0
  144. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/progressEvents.ts +0 -0
  145. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/runs.ts +0 -0
  146. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/triageBackendClient.ts +0 -0
  147. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/triageDb.ts +0 -0
  148. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/lib/variantId.ts +0 -0
  149. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/_app.tsx +0 -0
  150. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/aggregate/[variantId]/[category].ts +0 -0
  151. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/edit-drafts/[variantId]/[category]/[version].ts +0 -0
  152. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/edit-drafts/[variantId]/[category]/index.ts +0 -0
  153. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/papers/[doi]/pdf.ts +0 -0
  154. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/papers/index.ts +0 -0
  155. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/[variantId]/[runId]/progress.ts +0 -0
  156. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/index.ts +0 -0
  157. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/runs/latest.ts +0 -0
  158. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/claim.ts +0 -0
  159. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/comment.ts +0 -0
  160. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/paper-done.ts +0 -0
  161. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/api/triage/snapshot/[variantId]/[category]/[version].ts +0 -0
  162. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/index.tsx +0 -0
  163. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/variants/[variantId].tsx +0 -0
  164. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/pages/viewer/[variantId]/[category].tsx +0 -0
  165. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/src/styles/globals.css +0 -0
  166. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/tailwind.config.ts +0 -0
  167. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/LiteratureView.test.tsx +0 -0
  168. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/ProgressLog.test.tsx +0 -0
  169. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/aggregate.test.ts +0 -0
  170. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/chat-service.test.ts +0 -0
  171. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/index-page.test.tsx +0 -0
  172. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/matchFilename.test.ts +0 -0
  173. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers-pdf-upload.test.ts +0 -0
  174. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers-route.test.ts +0 -0
  175. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/papers.test.ts +0 -0
  176. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/progress-route.test.ts +0 -0
  177. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs-latest-route.test.ts +0 -0
  178. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs-route.test.ts +0 -0
  179. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/runs.test.ts +0 -0
  180. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/setup.ts +0 -0
  181. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/triage.test.ts +0 -0
  182. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/test/variantId.test.ts +0 -0
  183. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/tsconfig.json +0 -0
  184. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo/vitest.config.ts +0 -0
  185. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/README.md +0 -0
  186. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/pyproject.toml +0 -0
  187. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/__init__.py +0 -0
  188. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/config.py +0 -0
  189. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/progress.py +0 -0
  190. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/src/demo_gateway/runs.py +0 -0
  191. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/__init__.py +0 -0
  192. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/conftest.py +0 -0
  193. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_main.py +0 -0
  194. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_progress.py +0 -0
  195. {flowapy-0.1.2 → flowapy-0.2.0}/examples/demo-gateway/tests/test_runs.py +0 -0
  196. {flowapy-0.1.2 → flowapy-0.2.0}/package.json +0 -0
  197. {flowapy-0.1.2 → flowapy-0.2.0}/packages/.gitkeep +0 -0
  198. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/Dockerfile +0 -0
  199. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/LICENSE +0 -0
  200. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/README.md +0 -0
  201. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/artifact.ts +0 -0
  202. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/audit.ts +0 -0
  203. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/auth/jwt.ts +0 -0
  204. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/auth/oidc.ts +0 -0
  205. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/chat.ts +0 -0
  206. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/cli.ts +0 -0
  207. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/config.ts +0 -0
  208. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/index.ts +0 -0
  209. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/instrumentation.ts +0 -0
  210. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/anthropic.ts +0 -0
  211. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/bedrock.ts +0 -0
  212. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/factory.ts +0 -0
  213. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/google-gla.ts +0 -0
  214. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/google-vertex.ts +0 -0
  215. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/interface.ts +0 -0
  216. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/llm/openai.ts +0 -0
  217. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/prompts.ts +0 -0
  218. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/server.ts +0 -0
  219. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/session.ts +0 -0
  220. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/factory.ts +0 -0
  221. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/fs.ts +0 -0
  222. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/gcs.ts +0 -0
  223. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/interface.ts +0 -0
  224. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage/s3.ts +0 -0
  225. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/storage-keys.ts +0 -0
  226. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/telemetry.ts +0 -0
  227. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/text.ts +0 -0
  228. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/src/yaml.ts +0 -0
  229. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/chat.test.ts +0 -0
  230. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/generic-prompt.test.ts +0 -0
  231. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/llm-factory.test.ts +0 -0
  232. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/oidc.test.ts +0 -0
  233. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/paper-cache.test.ts +0 -0
  234. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-fs.test.ts +0 -0
  235. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-gcs.test.ts +0 -0
  236. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/storage-s3.test.ts +0 -0
  237. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/text.test.ts +0 -0
  238. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/test/yaml.test.ts +0 -0
  239. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/tsconfig.build.json +0 -0
  240. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/tsconfig.json +0 -0
  241. {flowapy-0.1.2 → flowapy-0.2.0}/packages/chat-service/vitest.config.ts +0 -0
  242. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/LICENSE +0 -0
  243. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/README.md +0 -0
  244. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/package.json +0 -0
  245. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/sanitize.test.ts +0 -0
  246. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/sanitize.ts +0 -0
  247. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/citations/types.ts +0 -0
  248. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/index.ts +0 -0
  249. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/llm-content/LlmContent.test.tsx +0 -0
  250. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/llm-content/LlmContent.tsx +0 -0
  251. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/PdfHighlightViewer.test.tsx +0 -0
  252. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/PdfHighlightViewer.tsx +0 -0
  253. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/pdf-viewer/types.ts +0 -0
  254. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/styles.css +0 -0
  255. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ChatDrawer.tsx +0 -0
  256. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ChatSection.tsx +0 -0
  257. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/ClaimList.tsx +0 -0
  258. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/EvidenceViewerShell.test.tsx +0 -0
  259. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/EvidenceViewerShell.tsx +0 -0
  260. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/FocusCard.tsx +0 -0
  261. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperHeader.tsx +0 -0
  262. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperRail.test.tsx +0 -0
  263. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/PaperRail.tsx +0 -0
  264. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/SynthesisPanel.tsx +0 -0
  265. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/backend.ts +0 -0
  266. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-resolver.ts +0 -0
  267. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-utils.test.ts +0 -0
  268. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/citation-utils.ts +0 -0
  269. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/claim-refs.test.ts +0 -0
  270. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/claim-refs.ts +0 -0
  271. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/keyboard.test.ts +0 -0
  272. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/keyboard.ts +0 -0
  273. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/store.test.ts +0 -0
  274. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/store.ts +0 -0
  275. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/src/triage/types.ts +0 -0
  276. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tailwind.config.ts +0 -0
  277. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tsconfig.json +0 -0
  278. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/tsup.config.ts +0 -0
  279. {flowapy-0.1.2 → flowapy-0.2.0}/packages/react-viewer/vitest.config.ts +0 -0
  280. {flowapy-0.1.2 → flowapy-0.2.0}/pnpm-lock.yaml +0 -0
  281. {flowapy-0.1.2 → flowapy-0.2.0}/pnpm-workspace.yaml +0 -0
  282. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_edit_prompt.txt +0 -0
  283. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_edit_schema.ts +0 -0
  284. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/aggregation_prompt.txt +0 -0
  285. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/extraction_prompt.txt +0 -0
  286. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/extraction_schema.py +0 -0
  287. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/generic/transcription_prompt.txt +0 -0
  288. {flowapy-0.1.2 → flowapy-0.2.0}/prompts/package.json +0 -0
  289. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/__init__.py +0 -0
  290. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/cli.py +0 -0
  291. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/clinvar.py +0 -0
  292. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/download.py +0 -0
  293. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/extract.py +0 -0
  294. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/http_retry.py +0 -0
  295. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/models.py +0 -0
  296. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/normalize.py +0 -0
  297. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/progress.py +0 -0
  298. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/prompts/__init__.py +0 -0
  299. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/py.typed +0 -0
  300. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/query.py +0 -0
  301. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/run.py +0 -0
  302. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/schema.py +0 -0
  303. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/settings.py +0 -0
  304. {flowapy-0.1.2 → flowapy-0.2.0}/src/flowa/storage.py +0 -0
  305. {flowapy-0.1.2 → flowapy-0.2.0}/tests/__init__.py +0 -0
  306. {flowapy-0.1.2 → flowapy-0.2.0}/tests/test_progress.py +0 -0
  307. {flowapy-0.1.2 → flowapy-0.2.0}/tests/test_prompts.py +0 -0
  308. {flowapy-0.1.2 → flowapy-0.2.0}/tsconfig.base.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowapy
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Variant literature assessment pipeline with AI extraction
5
5
  Project-URL: Homepage, https://github.com/populationgenomics/flowa
6
6
  Project-URL: Source, https://github.com/populationgenomics/flowa
@@ -47,6 +47,7 @@ Requires-Dist: pypdf
47
47
  Requires-Dist: s3fs
48
48
  Requires-Dist: tenacity
49
49
  Requires-Dist: typer
50
+ Requires-Dist: zstandard
50
51
  Provides-Extra: anthropic
51
52
  Requires-Dist: pydantic-ai-slim[anthropic]==1.101.0; extra == 'anthropic'
52
53
  Provides-Extra: bedrock
@@ -166,10 +166,10 @@ VARIANT=NM_001035_3-c_14174A_G
166
166
  rm -f assessments/$VARIANT/aggregation.json \
167
167
  assessments/$VARIANT/aggregation_raw.json
168
168
  rm -rf assessments/$VARIANT/extractions/ assessments/$VARIANT/runs/
169
- # Re-runs flowa.convert (which uses anchorite for PDF chunking).
170
- # Drop this line to reuse the cached markdown and only redo extract +
171
- # aggregate.
172
- rm -f papers/*/markdown.md papers/*/convert_raw.json
169
+ # Re-runs flowa.convert (which uses anchorite for PDF chunking and
170
+ # builds pdf_index.pkl.zst). Drop this line to reuse the cached markdown
171
+ # + index and only redo extract + aggregate.
172
+ rm -f papers/*/markdown.md papers/*/convert_raw.json papers/*/pdf_index.pkl.zst
173
173
  ```
174
174
 
175
175
  Then drive the pipeline. The demo's `scripts/start.ts` translates the
@@ -208,11 +208,13 @@ and not needed by anything downstream.
208
208
  For papers whose source license blocks redistribution (CC-BY-NC-ND,
209
209
  paywalled; see `fixtures/LICENSES.md` for the rule), do **not** delete
210
210
  the whole `papers/{encodedDoi}/` directory — only delete `source.pdf`,
211
- `markdown.md`, and `convert_raw.json`. Keep `metadata.json` (the
212
- bibliographic fields are factual data, not copyrightable) but replace
213
- its `abstract` field with a sentinel string, so the omission reads as
214
- deliberate (not a missing-data bug) when the literature view renders
215
- the row:
211
+ `markdown.md`, `convert_raw.json`, and `pdf_index.pkl.zst`. The
212
+ `pdf_index.pkl.zst` embeds the PDF's extracted text (anchorite's char
213
+ index), so it carries the same copyright as `source.pdf` and must not
214
+ ship in the open-source repo. Keep `metadata.json` (the bibliographic
215
+ fields are factual data, not copyrightable) but replace its `abstract`
216
+ field with a sentinel string, so the omission reads as deliberate (not
217
+ a missing-data bug) when the literature view renders the row:
216
218
 
217
219
  ```bash
218
220
  python3 -c "
@@ -18,9 +18,13 @@ from typing import Annotated
18
18
  import uvicorn
19
19
  from fastapi import APIRouter, Depends, FastAPI, HTTPException, Query, Request, status
20
20
  from fastapi.middleware.cors import CORSMiddleware
21
- from flowa.resolve import ResolvedCitations, ResolveRequest, resolve_citations
21
+ from flowa.resolve import (
22
+ ResolvedCitations,
23
+ ResolveRequest,
24
+ load_pdf_index_from_storage,
25
+ resolve_citations,
26
+ )
22
27
  from flowa.schema import VariantSpec
23
- from flowa.storage import paper_url, read_bytes, read_text
24
28
  from pydantic import BaseModel, Field
25
29
 
26
30
  from .config import Settings
@@ -111,24 +115,15 @@ def resolve_citations_route(
111
115
  ) -> ResolvedCitations:
112
116
  """Align verbatim quotes to PDF bboxes.
113
117
 
114
- Sync `def` so FastAPI auto-runs it in the threadpool — anchorite's PDF
115
- parsing is CPU-bound and would block the asyncio loop otherwise.
118
+ Sync `def` so FastAPI auto-runs it in the threadpool — deserialising the
119
+ PdfIndex pickle and aligning quotes is CPU-bound and would block the
120
+ asyncio loop otherwise.
116
121
  """
117
122
  base = str(settings.demo_data_dir)
118
-
119
- def pdf_loader(doi: str) -> bytes | None:
120
- try:
121
- return read_bytes(paper_url(base, doi, 'source.pdf'))
122
- except FileNotFoundError:
123
- return None
124
-
125
- def md_loader(doi: str) -> str | None:
126
- try:
127
- return read_text(paper_url(base, doi, 'markdown.md'))
128
- except FileNotFoundError:
129
- return None
130
-
131
- return resolve_citations(body.citations, pdf_loader=pdf_loader, markdown_loader=md_loader)
123
+ return resolve_citations(
124
+ body.citations,
125
+ index_provider=lambda doi: load_pdf_index_from_storage(base, doi),
126
+ )
132
127
 
133
128
 
134
129
  @asynccontextmanager
@@ -1,24 +1,13 @@
1
1
  """HTTP shape tests for /resolve-citations.
2
2
 
3
3
  Library-level resolver behaviour is covered in flowa's `tests/test_resolve.py`;
4
- here we just check that the route plumbs Settings → loader flowa.resolve
5
- correctly and returns the expected wire shape.
4
+ here we just check that the route plumbs Settings → index_provider
5
+ flowa.resolve correctly and returns the expected wire shape.
6
6
  """
7
7
 
8
- from pathlib import Path
9
-
10
8
  from fastapi.testclient import TestClient
11
- from flowa import resolve as flowa_resolve_module
12
- from flowa.storage import encode_doi
13
-
14
- from demo_gateway.config import Settings
15
-
16
9
 
17
- def _write_fake_paper(data_dir: Path, doi: str, pdf: bytes = b'fake-pdf', markdown: str = '# fake md') -> None:
18
- paper_dir = data_dir / 'papers' / encode_doi(doi)
19
- paper_dir.mkdir(parents=True, exist_ok=True)
20
- (paper_dir / 'source.pdf').write_bytes(pdf)
21
- (paper_dir / 'markdown.md').write_text(markdown)
10
+ import demo_gateway.main as demo_main
22
11
 
23
12
 
24
13
  def test_post_resolve_citations_rejects_malformed_body(client: TestClient) -> None:
@@ -26,8 +15,8 @@ def test_post_resolve_citations_rejects_malformed_body(client: TestClient) -> No
26
15
  assert response.status_code == 422
27
16
 
28
17
 
29
- def test_post_resolve_citations_returns_errors_for_missing_pdfs(client: TestClient) -> None:
30
- """When source.pdf is absent, the DOI surfaces in `errors` rather than `resolved`."""
18
+ def test_post_resolve_citations_returns_errors_for_missing_index(client: TestClient) -> None:
19
+ """When pdf_index.pkl.zst is absent, the DOI surfaces in `errors` rather than `resolved`."""
31
20
  response = client.post(
32
21
  '/resolve-citations',
33
22
  json={'citations': [{'doi': '10.1/missing', 'quotes': ['anything']}]},
@@ -35,15 +24,11 @@ def test_post_resolve_citations_returns_errors_for_missing_pdfs(client: TestClie
35
24
  assert response.status_code == 200
36
25
  body = response.json()
37
26
  assert body['resolved'] == {}
38
- assert body['errors'] == {'10.1/missing': 'source.pdf not found'}
27
+ assert body['errors'] == {'10.1/missing': 'pdf_index not available'}
39
28
 
40
29
 
41
- def test_post_resolve_citations_returns_resolved_bboxes(
42
- client: TestClient,
43
- settings: Settings,
44
- monkeypatch,
45
- ) -> None:
46
- """When source.pdf exists, the route resolves quotes to bboxes via the library."""
30
+ def test_post_resolve_citations_returns_resolved_bboxes(client: TestClient, monkeypatch) -> None:
31
+ """When the index loads, the route resolves quotes to bboxes via the library."""
47
32
 
48
33
  class _FakeBbox:
49
34
  def __init__(self, top: int, left: int, bottom: int, right: int) -> None:
@@ -53,16 +38,16 @@ def test_post_resolve_citations_returns_resolved_bboxes(
53
38
  self.right = right
54
39
 
55
40
  class _FakePdfIndex:
56
- def __init__(self, _pdf_bytes: bytes, *, markdown: str | None = None) -> None:
57
- pass
58
-
59
41
  def resolve(self, quotes: list[str]) -> dict[str, list[tuple[int, _FakeBbox]]]:
60
42
  # 0-indexed page from anchorite — the +1 boundary wrap in resolve.py
61
43
  # turns this into page=1 on the wire.
62
44
  return {q: [(0, _FakeBbox(top=10, left=20, bottom=30, right=40))] for q in quotes}
63
45
 
64
- monkeypatch.setattr(flowa_resolve_module, 'PdfIndex', _FakePdfIndex)
65
- _write_fake_paper(settings.demo_data_dir, '10.1/present')
46
+ monkeypatch.setattr(
47
+ demo_main,
48
+ 'load_pdf_index_from_storage',
49
+ lambda _base, doi: _FakePdfIndex() if doi == '10.1/present' else None,
50
+ )
66
51
 
67
52
  response = client.post(
68
53
  '/resolve-citations',
@@ -3,14 +3,14 @@ revision = 3
3
3
  requires-python = "==3.13.*"
4
4
 
5
5
  [options]
6
- exclude-newer = "2026-05-19T00:00:39.870651Z"
6
+ exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
7
7
  exclude-newer-span = "P7D"
8
8
 
9
9
  [options.exclude-newer-package]
10
- pydantic-ai-slim = { timestamp = "2026-05-26T00:00:39.87066Z", span = "PT0S" }
11
- anchorite = { timestamp = "2026-05-26T00:00:39.870656Z", span = "PT0S" }
12
- seq-smith = { timestamp = "2026-05-26T00:00:39.87066Z", span = "PT0S" }
13
- pydantic-graph = { timestamp = "2026-05-26T00:00:39.870661Z", span = "PT0S" }
10
+ pydantic-ai-slim = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
11
+ anchorite = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
12
+ seq-smith = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
13
+ pydantic-graph = { timestamp = "0001-01-01T00:00:00Z", span = "PT0S" }
14
14
 
15
15
  [[package]]
16
16
  name = "aiohappyeyeballs"
@@ -415,7 +415,7 @@ wheels = [
415
415
 
416
416
  [[package]]
417
417
  name = "flowapy"
418
- version = "0.1.2"
418
+ version = "0.2.0"
419
419
  source = { editable = "../../" }
420
420
  dependencies = [
421
421
  { name = "anchorite" },
@@ -431,6 +431,7 @@ dependencies = [
431
431
  { name = "s3fs" },
432
432
  { name = "tenacity" },
433
433
  { name = "typer" },
434
+ { name = "zstandard" },
434
435
  ]
435
436
 
436
437
  [package.optional-dependencies]
@@ -466,6 +467,7 @@ requires-dist = [
466
467
  { name = "s3fs" },
467
468
  { name = "tenacity" },
468
469
  { name = "typer" },
470
+ { name = "zstandard" },
469
471
  ]
470
472
  provides-extras = ["anthropic", "bedrock", "google", "openai"]
471
473
 
@@ -2109,3 +2111,28 @@ sdist = { url = "https://files.pythonhosted.org/packages/30/21/093488dfc7cc8964d
2109
2111
  wheels = [
2110
2112
  { url = "https://files.pythonhosted.org/packages/08/8a/0861bec20485572fbddf3dfba2910e38fe249796cb73ecdeb74e07eeb8d3/zipp-3.23.1-py3-none-any.whl", hash = "sha256:0b3596c50a5c700c9cb40ba8d86d9f2cc4807e9bedb06bcdf7fac85633e444dc", size = 10378, upload-time = "2026-04-13T23:21:45.386Z" },
2111
2113
  ]
2114
+
2115
+ [[package]]
2116
+ name = "zstandard"
2117
+ version = "0.25.0"
2118
+ source = { registry = "https://pypi.org/simple" }
2119
+ sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" }
2120
+ wheels = [
2121
+ { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" },
2122
+ { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" },
2123
+ { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" },
2124
+ { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" },
2125
+ { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" },
2126
+ { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" },
2127
+ { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" },
2128
+ { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" },
2129
+ { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" },
2130
+ { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" },
2131
+ { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" },
2132
+ { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" },
2133
+ { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" },
2134
+ { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" },
2135
+ { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" },
2136
+ { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" },
2137
+ { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" },
2138
+ ]
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@flowajs/chat-service",
3
- "version": "0.0.3",
3
+ "version": "0.0.4",
4
4
  "description": "Stateless service that orchestrates LLM conversations over flowa artifacts.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -71,7 +71,7 @@
71
71
  "zod": "4.4.3"
72
72
  },
73
73
  "peerDependencies": {
74
- "@ai-sdk/amazon-bedrock": "^4.0.0",
74
+ "@ai-sdk/amazon-bedrock": "^4.0.101",
75
75
  "@ai-sdk/anthropic": "^3.0.0",
76
76
  "@ai-sdk/google": "^3.0.0",
77
77
  "@ai-sdk/google-vertex": "^4.0.0",
@@ -0,0 +1,36 @@
1
+ """Aggregation schema for generic ACMG-style variant assessment.
2
+
3
+ This module defines the output structure for aggregation across papers.
4
+ The AggregationResult class is loaded dynamically by Flowa.
5
+
6
+ Interface requirements (accessed by Flowa's validation logic):
7
+ - results[].category must exist
8
+ - results[].claims[].paper_id and .citations[].quote must exist
9
+ - results[].papers[].paper_id must exist
10
+
11
+ Strict structured outputs (Bedrock/Anthropic/OpenAI NativeOutput) clobber
12
+ additionalProperties to false, collapsing dict[str, X] fields to empty objects.
13
+ Use a list shape with the category carried as a field inside each entry.
14
+ """
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+ from flowa.artifact import CategoryResult as BaseCategoryResult
19
+
20
+
21
+ class CategoryResult(BaseCategoryResult):
22
+ """ACMG-classification result for a single assessment category."""
23
+
24
+ classification: str = Field(
25
+ description='ACMG classification: Pathogenic, Likely Pathogenic, VUS, Likely Benign, or Benign.'
26
+ )
27
+ classification_rationale: str = Field(description='Brief explanation of why this classification was selected.')
28
+
29
+
30
+ class AggregationResult(BaseModel):
31
+ """Multi-category aggregation result for ACMG-style variant assessment."""
32
+
33
+ results: list[CategoryResult] = Field(
34
+ description='List of assessment results, one per selected category. '
35
+ 'Each entry carries its own `category` identifier.'
36
+ )
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flowapy"
3
- version = "0.1.2"
3
+ version = "0.2.0"
4
4
  description = "Variant literature assessment pipeline with AI extraction"
5
5
  readme = "README.md"
6
6
  requires-python = "==3.13.*"
@@ -27,6 +27,7 @@ dependencies = [
27
27
  "s3fs", # S3/MinIO support for fsspec
28
28
  "tenacity",
29
29
  "typer",
30
+ "zstandard",
30
31
  ]
31
32
 
32
33
  [project.optional-dependencies]
@@ -15,7 +15,7 @@ from pydantic_ai import Agent, ModelRetry, NativeOutput, RunContext
15
15
  from flowa.clinvar import format_clinvar_for_prompt, query_clinvar
16
16
  from flowa.models import create_model, get_model_settings
17
17
  from flowa.prompts import load_prompt_and_schema
18
- from flowa.resolve import CitationQuery, resolve_citations
18
+ from flowa.resolve import CitationQuery, load_pdf_index_from_storage, resolve_citations
19
19
  from flowa.schema import AGGREGATION_SCHEMA_VERSION, with_schema_version
20
20
  from flowa.settings import ModelConfig, Settings
21
21
  from flowa.storage import (
@@ -23,9 +23,7 @@ from flowa.storage import (
23
23
  encode_doi,
24
24
  exists,
25
25
  paper_url,
26
- read_bytes,
27
26
  read_json,
28
- read_text,
29
27
  write_bytes,
30
28
  write_json,
31
29
  )
@@ -158,16 +156,14 @@ def create_aggregate_agent(
158
156
  def resolve_aggregate_citations(
159
157
  aggregate_dict: dict[str, Any],
160
158
  paper_id_to_doi: dict[str, str],
161
- pdf_bytes_cache: dict[str, bytes],
162
- markdown_cache: dict[str, str],
159
+ base: str,
163
160
  metadata_cache: dict[str, dict[str, Any]],
164
161
  ) -> None:
165
162
  """Post-process aggregate output: resolve quotes to bboxes on claim citations.
166
163
 
167
- Delegates the actual alignment to `flowa.resolve.resolve_citations` with
168
- cache-backed loaders, then attaches the resulting bboxes onto each claim's
169
- citations in place. Claims are grouped by paper_id so every (paper_id,
170
- quote) pair resolves to exactly one paper.
164
+ Loads each paper's pre-built `pdf_index.pkl.zst` via the same path the
165
+ gateway uses; the convert step that ran earlier in this pipeline wrote
166
+ the artifact, so it's guaranteed to be present.
171
167
  """
172
168
  # Collect all (doi, quote) pairs, grouped by DOI.
173
169
  doi_quotes: dict[str, list[str]] = {}
@@ -180,8 +176,7 @@ def resolve_aggregate_citations(
180
176
  citations_input = [CitationQuery(doi=doi, quotes=quotes) for doi, quotes in doi_quotes.items()]
181
177
  result = resolve_citations(
182
178
  citations_input,
183
- pdf_loader=pdf_bytes_cache.get,
184
- markdown_loader=markdown_cache.get,
179
+ index_provider=lambda doi: load_pdf_index_from_storage(base, doi),
185
180
  )
186
181
 
187
182
  # Attach resolved bboxes onto each claim's citations.
@@ -229,14 +224,11 @@ async def aggregate_evidence_async(
229
224
  clinvar_data = query_clinvar(hgvs_c_full, ncbi_api_key)
230
225
  clinvar_text = format_clinvar_for_prompt(clinvar_data)
231
226
 
232
- # Load extractions and metadata for each paper. PDF bytes and Markdown are
233
- # cached for post-LLM citation resolution: PdfIndex takes both (markdown
234
- # denoises the indexed PDF chars). Both files are produced together by the
235
- # pipeline; a missing markdown.md at this point is a storage corruption and
236
- # surfaces as FileNotFoundError below.
227
+ # Load extractions and metadata for each paper. PDF bytes and markdown
228
+ # are NOT loaded here — the post-LLM citation resolver loads the paper's
229
+ # pre-built `pdf_index.pkl.zst` directly from storage, so this step only
230
+ # needs the LLM inputs.
237
231
  evidence_extractions: list[dict[str, Any]] = []
238
- pdf_bytes_cache: dict[str, bytes] = {}
239
- markdown_cache: dict[str, str] = {}
240
232
  metadata_cache: dict[str, dict[str, Any]] = {}
241
233
 
242
234
  for doi in dois:
@@ -252,8 +244,6 @@ async def aggregate_evidence_async(
252
244
  log.info('Skipping %s: variant not discussed', doi)
253
245
  continue
254
246
 
255
- pdf_bytes_cache[doi] = read_bytes(paper_url(base, doi, 'source.pdf'))
256
- markdown_cache[doi] = read_text(paper_url(base, doi, 'markdown.md'))
257
247
  metadata = read_json(paper_url(base, doi, 'metadata.json'))
258
248
  metadata_cache[doi] = metadata
259
249
 
@@ -332,7 +322,7 @@ async def aggregate_evidence_async(
332
322
  # Post-LLM: resolve quotes to bboxes, replace paper_id with DOI
333
323
  aggregate_dict = output.model_dump()
334
324
  with logfire.span('flowa.resolve_citations', paper_count=len(paper_id_to_doi)):
335
- resolve_aggregate_citations(aggregate_dict, paper_id_to_doi, pdf_bytes_cache, markdown_cache, metadata_cache)
325
+ resolve_aggregate_citations(aggregate_dict, paper_id_to_doi, base, metadata_cache)
336
326
 
337
327
  # Store structured aggregation result
338
328
  write_json(aggregation_url, with_schema_version(aggregate_dict, AGGREGATION_SCHEMA_VERSION))
@@ -0,0 +1,73 @@
1
+ """Shared schema primitives for aggregation prompt sets.
2
+
3
+ `CategoryResult` is the citation-grounded base each deployment derives from
4
+ to add its own classification field(s) — e.g. generic ACMG sets add
5
+ `classification` + `classification_rationale`; private deployments may add
6
+ their own classification dimensions. Each deployment also defines its own
7
+ `AggregationResult` with `results: list[CategoryResult]` pointing at its
8
+ own `CategoryResult` subclass (Pydantic doesn't covariantly substitute the
9
+ parent's parameterised generic).
10
+
11
+ This module is the Python analog of `@flowajs/chat-service`'s
12
+ `artifactFields` / `ArtifactSchema` export: it exists so prompt-set schemas
13
+ share their citation-grounded structure rather than duplicating it.
14
+
15
+ The schema-side descriptions here are deliberately structural — they
16
+ describe what each field IS. Behavioural guidance (how to populate
17
+ `description`, ranking conventions, claim-emission criteria) belongs in the
18
+ aggregation prompt template, not the schema.
19
+ """
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+
24
+ class AggregateCitation(BaseModel):
25
+ """A citation quoting a specific passage from a source paper."""
26
+
27
+ quote: str = Field(
28
+ description='A short, distinctive verbatim quote from the paper text that identifies this evidence.'
29
+ )
30
+
31
+
32
+ class Claim(BaseModel):
33
+ """A factual statement supporting a synthesis, sourced from one paper."""
34
+
35
+ paper_id: str = Field(description='AuthorYear identifier of the source paper; must appear in papers[].')
36
+ text: str = Field(
37
+ description='The factual statement as the curator reads it in triage. '
38
+ 'May synthesise across citations from the same paper.'
39
+ )
40
+ citations: list[AggregateCitation] = Field(
41
+ description='One or more supporting quotes from paper_id.',
42
+ min_length=1,
43
+ )
44
+
45
+
46
+ class RankedPaper(BaseModel):
47
+ """A paper in the ranked papers list. List position encodes importance."""
48
+
49
+ paper_id: str = Field(description='AuthorYear identifier; must match the key used in paper_id_mapping.')
50
+ rank_rationale: str = Field(description='One sentence explaining why this paper sits at this rank.')
51
+
52
+
53
+ class CategoryResult(BaseModel):
54
+ """Citation-grounded base for an aggregation result.
55
+
56
+ Deployments subclass to add classification-specific fields. The five
57
+ fields here are the contract that `flowa.aggregate` reads at validation
58
+ time.
59
+ """
60
+
61
+ category: str = Field(description='Assessment-category identifier.')
62
+ description: str = Field(description='Short human-readable summary of the synthesis.')
63
+ notes: str = Field(
64
+ description='Long-form synthesis in Markdown. Uses inline citation links: '
65
+ '[text](#cite:paper_id "verbatim quote") to reference specific paper evidence locations.'
66
+ )
67
+ papers: list[RankedPaper] = Field(
68
+ description='Source documents contributing to the synthesis, ordered by importance. '
69
+ 'List position is the rank; paper_id values must be unique.'
70
+ )
71
+ claims: list[Claim] = Field(
72
+ description='Factual claims supporting the synthesis, grouped by paper_id in the same order as papers[].'
73
+ )
@@ -17,9 +17,11 @@ from pydantic_ai import Agent
17
17
  from pydantic_ai.messages import BinaryContent
18
18
 
19
19
  from flowa.models import create_model, get_model_settings
20
+ from flowa.pdf_index_cache import build as build_pdf_index_payload
21
+ from flowa.pdf_index_cache import serialize as serialize_pdf_index_payload
20
22
  from flowa.prompts import load_text_prompt
21
23
  from flowa.settings import ModelConfig, Settings
22
- from flowa.storage import exists, paper_url, read_bytes, write_bytes, write_text
24
+ from flowa.storage import exists, paper_url, read_bytes, read_text, write_bytes, write_text
23
25
 
24
26
  log = logging.getLogger(__name__)
25
27
 
@@ -120,14 +122,22 @@ async def transcribe(
120
122
 
121
123
 
122
124
  async def convert_paper_async(base: str, doi: str, model: ModelConfig, prompt_set: str = 'generic') -> None:
123
- """Convert a single paper's PDF to Markdown.
125
+ """Convert a single paper's PDF to Markdown and persist its `PdfIndex`.
124
126
 
125
- Reads PDF from papers/{encoded_doi}/source.pdf in object storage.
126
- Stores result to papers/{encoded_doi}/markdown.md.
127
+ Reads PDF from papers/{encoded_doi}/source.pdf and writes
128
+ papers/{encoded_doi}/markdown.md plus papers/{encoded_doi}/pdf_index.pkl.zst
129
+ (consumed by the gateway's resolve endpoint).
130
+
131
+ Either artifact can be missing independently — if a previous run failed
132
+ or pre-dates the pdf_index step, the next call fills in only what's
133
+ missing without re-transcribing or re-building work that's already done.
127
134
  """
128
135
  md_url = paper_url(base, doi, 'markdown.md')
136
+ index_url = paper_url(base, doi, 'pdf_index.pkl.zst')
137
+ md_needed = not exists(md_url)
138
+ index_needed = not exists(index_url)
129
139
 
130
- if exists(md_url):
140
+ if not md_needed and not index_needed:
131
141
  log.info('Already converted: %s', md_url)
132
142
  return
133
143
 
@@ -138,21 +148,36 @@ async def convert_paper_async(base: str, doi: str, model: ModelConfig, prompt_se
138
148
  log.info('Skipping DOI %s: PDF not available', doi)
139
149
  return
140
150
 
141
- log.info(
142
- 'Converting DOI %s (%d bytes, model: %s, chunk: %d pages)', doi, len(pdf_bytes), model.name, PAGES_PER_CHUNK
143
- )
144
-
145
- prompt = load_text_prompt('transcription', prompt_set)
146
- t0 = time.monotonic()
147
- result = await transcribe(pdf_bytes, model=model, prompt=prompt, page_count=PAGES_PER_CHUNK)
148
- elapsed = time.monotonic() - t0
149
-
150
- write_text(md_url, result.markdown)
151
-
152
- raw_url = paper_url(base, doi, 'convert_raw.json')
153
- write_bytes(raw_url, json.dumps(result.all_messages).encode())
154
-
155
- log.info('Converted DOI %s: %d chars in %.1fs', doi, len(result.markdown), elapsed)
151
+ markdown: str | None = None
152
+ if md_needed:
153
+ log.info(
154
+ 'Converting DOI %s (%d bytes, model: %s, chunk: %d pages)', doi, len(pdf_bytes), model.name, PAGES_PER_CHUNK
155
+ )
156
+ prompt = load_text_prompt('transcription', prompt_set)
157
+ t0 = time.monotonic()
158
+ result = await transcribe(pdf_bytes, model=model, prompt=prompt, page_count=PAGES_PER_CHUNK)
159
+ elapsed = time.monotonic() - t0
160
+
161
+ markdown = result.markdown
162
+ write_text(md_url, markdown)
163
+ write_bytes(paper_url(base, doi, 'convert_raw.json'), json.dumps(result.all_messages).encode())
164
+ log.info('Converted DOI %s: %d chars in %.1fs', doi, len(markdown), elapsed)
165
+
166
+ if index_needed:
167
+ # PdfIndex construction is CPU-bound (~8s on the deployed gateway
168
+ # hardware) and dominates per-call latency at `/api/v1/resolve` if
169
+ # rebuilt on every call. Pay the cost here once per paper and ship
170
+ # the result; see `flowa.pdf_index_cache` for the storage format.
171
+ # `asyncio.to_thread` keeps the rest of the convert pipeline (other
172
+ # papers being transcribed concurrently) unblocked.
173
+ if markdown is None: # index missing but markdown already on disk
174
+ markdown = read_text(md_url)
175
+ t0 = time.monotonic()
176
+ blob = await asyncio.to_thread(
177
+ lambda: serialize_pdf_index_payload(build_pdf_index_payload(pdf_bytes, markdown))
178
+ )
179
+ write_bytes(index_url, blob)
180
+ log.info('Wrote pdf_index for DOI %s: %.1f MB in %.1fs', doi, len(blob) / 1e6, time.monotonic() - t0)
156
181
 
157
182
 
158
183
  def convert_paper(