@oriro/orirocli 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (351) hide show
  1. package/ATTRIBUTION.md +8 -0
  2. package/LICENSE +21 -0
  3. package/dist/cli.js +35 -5
  4. package/package.json +1 -1
  5. package/skills/21stdev/SKILL.md +64 -0
  6. package/skills/graphify/SKILL.md +619 -0
  7. package/skills/graphify/__init__.py +28 -0
  8. package/skills/graphify/__main__.py +4582 -0
  9. package/skills/graphify/affected.py +154 -0
  10. package/skills/graphify/always_on/agents-md.md +12 -0
  11. package/skills/graphify/always_on/antigravity-rules.md +14 -0
  12. package/skills/graphify/always_on/claude-md.md +9 -0
  13. package/skills/graphify/always_on/gemini-md.md +9 -0
  14. package/skills/graphify/always_on/kiro-steering.md +5 -0
  15. package/skills/graphify/always_on/vscode-instructions.md +17 -0
  16. package/skills/graphify/analyze.py +724 -0
  17. package/skills/graphify/benchmark.py +155 -0
  18. package/skills/graphify/build.py +487 -0
  19. package/skills/graphify/cache.py +417 -0
  20. package/skills/graphify/callflow_html.py +2020 -0
  21. package/skills/graphify/cluster.py +272 -0
  22. package/skills/graphify/command-kilo.md +15 -0
  23. package/skills/graphify/dedup.py +429 -0
  24. package/skills/graphify/detect.py +1379 -0
  25. package/skills/graphify/diagnostics.py +390 -0
  26. package/skills/graphify/export.py +1408 -0
  27. package/skills/graphify/extract.py +11570 -0
  28. package/skills/graphify/global_graph.py +159 -0
  29. package/skills/graphify/google_workspace.py +223 -0
  30. package/skills/graphify/hooks.py +457 -0
  31. package/skills/graphify/ingest.py +331 -0
  32. package/skills/graphify/llm.py +1896 -0
  33. package/skills/graphify/manifest.py +4 -0
  34. package/skills/graphify/mcp_ingest.py +392 -0
  35. package/skills/graphify/multigraph_compat.py +212 -0
  36. package/skills/graphify/pg_introspect.py +142 -0
  37. package/skills/graphify/prs.py +748 -0
  38. package/skills/graphify/querylog.py +70 -0
  39. package/skills/graphify/report.py +218 -0
  40. package/skills/graphify/scip_ingest.py +363 -0
  41. package/skills/graphify/security.py +336 -0
  42. package/skills/graphify/semantic_cleanup.py +319 -0
  43. package/skills/graphify/serve.py +1309 -0
  44. package/skills/graphify/skill-aider.md +1246 -0
  45. package/skills/graphify/skill-amp.md +613 -0
  46. package/skills/graphify/skill-claw.md +616 -0
  47. package/skills/graphify/skill-codex.md +613 -0
  48. package/skills/graphify/skill-copilot.md +616 -0
  49. package/skills/graphify/skill-devin.md +1372 -0
  50. package/skills/graphify/skill-droid.md +613 -0
  51. package/skills/graphify/skill-kilo.md +625 -0
  52. package/skills/graphify/skill-kiro.md +615 -0
  53. package/skills/graphify/skill-opencode.md +608 -0
  54. package/skills/graphify/skill-pi.md +615 -0
  55. package/skills/graphify/skill-trae.md +614 -0
  56. package/skills/graphify/skill-vscode.md +612 -0
  57. package/skills/graphify/skill-windows.md +651 -0
  58. package/skills/graphify/skills/amp/references/add-watch.md +56 -0
  59. package/skills/graphify/skills/amp/references/exports.md +71 -0
  60. package/skills/graphify/skills/amp/references/extraction-spec.md +68 -0
  61. package/skills/graphify/skills/amp/references/github-and-merge.md +46 -0
  62. package/skills/graphify/skills/amp/references/hooks.md +33 -0
  63. package/skills/graphify/skills/amp/references/query.md +249 -0
  64. package/skills/graphify/skills/amp/references/transcribe.md +48 -0
  65. package/skills/graphify/skills/amp/references/update.md +179 -0
  66. package/skills/graphify/skills/claude/references/add-watch.md +56 -0
  67. package/skills/graphify/skills/claude/references/exports.md +71 -0
  68. package/skills/graphify/skills/claude/references/extraction-spec.md +68 -0
  69. package/skills/graphify/skills/claude/references/github-and-merge.md +46 -0
  70. package/skills/graphify/skills/claude/references/hooks.md +33 -0
  71. package/skills/graphify/skills/claude/references/query.md +103 -0
  72. package/skills/graphify/skills/claude/references/transcribe.md +48 -0
  73. package/skills/graphify/skills/claude/references/update.md +179 -0
  74. package/skills/graphify/skills/claw/references/add-watch.md +56 -0
  75. package/skills/graphify/skills/claw/references/exports.md +71 -0
  76. package/skills/graphify/skills/claw/references/extraction-spec.md +29 -0
  77. package/skills/graphify/skills/claw/references/github-and-merge.md +46 -0
  78. package/skills/graphify/skills/claw/references/hooks.md +33 -0
  79. package/skills/graphify/skills/claw/references/query.md +249 -0
  80. package/skills/graphify/skills/claw/references/transcribe.md +48 -0
  81. package/skills/graphify/skills/claw/references/update.md +179 -0
  82. package/skills/graphify/skills/codex/references/add-watch.md +56 -0
  83. package/skills/graphify/skills/codex/references/exports.md +71 -0
  84. package/skills/graphify/skills/codex/references/extraction-spec.md +29 -0
  85. package/skills/graphify/skills/codex/references/github-and-merge.md +46 -0
  86. package/skills/graphify/skills/codex/references/hooks.md +33 -0
  87. package/skills/graphify/skills/codex/references/query.md +249 -0
  88. package/skills/graphify/skills/codex/references/transcribe.md +48 -0
  89. package/skills/graphify/skills/codex/references/update.md +179 -0
  90. package/skills/graphify/skills/copilot/references/add-watch.md +56 -0
  91. package/skills/graphify/skills/copilot/references/exports.md +71 -0
  92. package/skills/graphify/skills/copilot/references/extraction-spec.md +68 -0
  93. package/skills/graphify/skills/copilot/references/github-and-merge.md +46 -0
  94. package/skills/graphify/skills/copilot/references/hooks.md +33 -0
  95. package/skills/graphify/skills/copilot/references/query.md +249 -0
  96. package/skills/graphify/skills/copilot/references/transcribe.md +48 -0
  97. package/skills/graphify/skills/copilot/references/update.md +179 -0
  98. package/skills/graphify/skills/droid/references/add-watch.md +56 -0
  99. package/skills/graphify/skills/droid/references/exports.md +71 -0
  100. package/skills/graphify/skills/droid/references/extraction-spec.md +68 -0
  101. package/skills/graphify/skills/droid/references/github-and-merge.md +46 -0
  102. package/skills/graphify/skills/droid/references/hooks.md +33 -0
  103. package/skills/graphify/skills/droid/references/query.md +249 -0
  104. package/skills/graphify/skills/droid/references/transcribe.md +48 -0
  105. package/skills/graphify/skills/droid/references/update.md +179 -0
  106. package/skills/graphify/skills/kilo/references/add-watch.md +56 -0
  107. package/skills/graphify/skills/kilo/references/exports.md +71 -0
  108. package/skills/graphify/skills/kilo/references/extraction-spec.md +68 -0
  109. package/skills/graphify/skills/kilo/references/github-and-merge.md +46 -0
  110. package/skills/graphify/skills/kilo/references/hooks.md +33 -0
  111. package/skills/graphify/skills/kilo/references/query.md +249 -0
  112. package/skills/graphify/skills/kilo/references/transcribe.md +48 -0
  113. package/skills/graphify/skills/kilo/references/update.md +179 -0
  114. package/skills/graphify/skills/kiro/references/add-watch.md +56 -0
  115. package/skills/graphify/skills/kiro/references/exports.md +71 -0
  116. package/skills/graphify/skills/kiro/references/extraction-spec.md +29 -0
  117. package/skills/graphify/skills/kiro/references/github-and-merge.md +46 -0
  118. package/skills/graphify/skills/kiro/references/hooks.md +33 -0
  119. package/skills/graphify/skills/kiro/references/query.md +249 -0
  120. package/skills/graphify/skills/kiro/references/transcribe.md +48 -0
  121. package/skills/graphify/skills/kiro/references/update.md +179 -0
  122. package/skills/graphify/skills/opencode/references/add-watch.md +56 -0
  123. package/skills/graphify/skills/opencode/references/exports.md +71 -0
  124. package/skills/graphify/skills/opencode/references/extraction-spec.md +68 -0
  125. package/skills/graphify/skills/opencode/references/github-and-merge.md +46 -0
  126. package/skills/graphify/skills/opencode/references/hooks.md +33 -0
  127. package/skills/graphify/skills/opencode/references/query.md +249 -0
  128. package/skills/graphify/skills/opencode/references/transcribe.md +48 -0
  129. package/skills/graphify/skills/opencode/references/update.md +179 -0
  130. package/skills/graphify/skills/pi/references/add-watch.md +56 -0
  131. package/skills/graphify/skills/pi/references/exports.md +71 -0
  132. package/skills/graphify/skills/pi/references/extraction-spec.md +29 -0
  133. package/skills/graphify/skills/pi/references/github-and-merge.md +46 -0
  134. package/skills/graphify/skills/pi/references/hooks.md +33 -0
  135. package/skills/graphify/skills/pi/references/query.md +249 -0
  136. package/skills/graphify/skills/pi/references/transcribe.md +48 -0
  137. package/skills/graphify/skills/pi/references/update.md +179 -0
  138. package/skills/graphify/skills/trae/references/add-watch.md +56 -0
  139. package/skills/graphify/skills/trae/references/exports.md +71 -0
  140. package/skills/graphify/skills/trae/references/extraction-spec.md +68 -0
  141. package/skills/graphify/skills/trae/references/github-and-merge.md +46 -0
  142. package/skills/graphify/skills/trae/references/hooks.md +35 -0
  143. package/skills/graphify/skills/trae/references/query.md +249 -0
  144. package/skills/graphify/skills/trae/references/transcribe.md +48 -0
  145. package/skills/graphify/skills/trae/references/update.md +179 -0
  146. package/skills/graphify/skills/vscode/references/add-watch.md +56 -0
  147. package/skills/graphify/skills/vscode/references/exports.md +71 -0
  148. package/skills/graphify/skills/vscode/references/extraction-spec.md +68 -0
  149. package/skills/graphify/skills/vscode/references/github-and-merge.md +46 -0
  150. package/skills/graphify/skills/vscode/references/hooks.md +33 -0
  151. package/skills/graphify/skills/vscode/references/query.md +249 -0
  152. package/skills/graphify/skills/vscode/references/transcribe.md +48 -0
  153. package/skills/graphify/skills/vscode/references/update.md +179 -0
  154. package/skills/graphify/skills/windows/references/add-watch.md +56 -0
  155. package/skills/graphify/skills/windows/references/exports.md +71 -0
  156. package/skills/graphify/skills/windows/references/extraction-spec.md +68 -0
  157. package/skills/graphify/skills/windows/references/github-and-merge.md +46 -0
  158. package/skills/graphify/skills/windows/references/hooks.md +33 -0
  159. package/skills/graphify/skills/windows/references/query.md +249 -0
  160. package/skills/graphify/skills/windows/references/transcribe.md +48 -0
  161. package/skills/graphify/skills/windows/references/update.md +179 -0
  162. package/skills/graphify/symbol_resolution.py +538 -0
  163. package/skills/graphify/transcribe.py +184 -0
  164. package/skills/graphify/tree_html.py +582 -0
  165. package/skills/graphify/validate.py +72 -0
  166. package/skills/graphify/watch.py +898 -0
  167. package/skills/graphify/wiki.py +282 -0
  168. package/skills/impeccable/SKILL.md +186 -0
  169. package/skills/impeccable/agents/impeccable_asset_producer.toml +92 -0
  170. package/skills/impeccable/agents/impeccable_manual_edit_applier.toml +95 -0
  171. package/skills/impeccable/agents/openai.yaml +4 -0
  172. package/skills/impeccable/reference/adapt.md +311 -0
  173. package/skills/impeccable/reference/animate.md +201 -0
  174. package/skills/impeccable/reference/audit.md +133 -0
  175. package/skills/impeccable/reference/bolder.md +113 -0
  176. package/skills/impeccable/reference/brand.md +108 -0
  177. package/skills/impeccable/reference/clarify.md +288 -0
  178. package/skills/impeccable/reference/codex.md +105 -0
  179. package/skills/impeccable/reference/colorize.md +257 -0
  180. package/skills/impeccable/reference/craft.md +123 -0
  181. package/skills/impeccable/reference/critique.md +790 -0
  182. package/skills/impeccable/reference/delight.md +302 -0
  183. package/skills/impeccable/reference/distill.md +111 -0
  184. package/skills/impeccable/reference/document.md +429 -0
  185. package/skills/impeccable/reference/extract.md +69 -0
  186. package/skills/impeccable/reference/harden.md +347 -0
  187. package/skills/impeccable/reference/init.md +172 -0
  188. package/skills/impeccable/reference/interaction-design.md +189 -0
  189. package/skills/impeccable/reference/layout.md +161 -0
  190. package/skills/impeccable/reference/live.md +720 -0
  191. package/skills/impeccable/reference/onboard.md +234 -0
  192. package/skills/impeccable/reference/optimize.md +258 -0
  193. package/skills/impeccable/reference/overdrive.md +130 -0
  194. package/skills/impeccable/reference/polish.md +241 -0
  195. package/skills/impeccable/reference/product.md +60 -0
  196. package/skills/impeccable/reference/quieter.md +99 -0
  197. package/skills/impeccable/reference/shape.md +165 -0
  198. package/skills/impeccable/reference/typeset.md +279 -0
  199. package/skills/impeccable/scripts/cleanup-deprecated.mjs +284 -0
  200. package/skills/impeccable/scripts/command-metadata.json +94 -0
  201. package/skills/impeccable/scripts/context-signals.mjs +225 -0
  202. package/skills/impeccable/scripts/context.mjs +266 -0
  203. package/skills/impeccable/scripts/critique-storage.mjs +242 -0
  204. package/skills/impeccable/scripts/design-parser.mjs +835 -0
  205. package/skills/impeccable/scripts/detect-csp.mjs +198 -0
  206. package/skills/impeccable/scripts/detect.mjs +21 -0
  207. package/skills/impeccable/scripts/detector/browser/injected/index.mjs +1733 -0
  208. package/skills/impeccable/scripts/detector/cli/main.mjs +244 -0
  209. package/skills/impeccable/scripts/detector/detect-antipatterns-browser.js +4618 -0
  210. package/skills/impeccable/scripts/detector/detect-antipatterns.mjs +43 -0
  211. package/skills/impeccable/scripts/detector/engines/browser/detect-url.mjs +252 -0
  212. package/skills/impeccable/scripts/detector/engines/regex/detect-text.mjs +535 -0
  213. package/skills/impeccable/scripts/detector/engines/static-html/css-cascade.mjs +986 -0
  214. package/skills/impeccable/scripts/detector/engines/static-html/detect-html.mjs +208 -0
  215. package/skills/impeccable/scripts/detector/engines/visual/screenshot-contrast.mjs +189 -0
  216. package/skills/impeccable/scripts/detector/findings.mjs +12 -0
  217. package/skills/impeccable/scripts/detector/node/file-system.mjs +198 -0
  218. package/skills/impeccable/scripts/detector/profile/profiler.mjs +166 -0
  219. package/skills/impeccable/scripts/detector/registry/antipatterns.mjs +419 -0
  220. package/skills/impeccable/scripts/detector/rules/checks.mjs +2384 -0
  221. package/skills/impeccable/scripts/detector/shared/color.mjs +124 -0
  222. package/skills/impeccable/scripts/detector/shared/constants.mjs +101 -0
  223. package/skills/impeccable/scripts/detector/shared/page.mjs +7 -0
  224. package/skills/impeccable/scripts/impeccable-paths.mjs +126 -0
  225. package/skills/impeccable/scripts/is-generated.mjs +69 -0
  226. package/skills/impeccable/scripts/live-accept.mjs +812 -0
  227. package/skills/impeccable/scripts/live-browser-session.js +123 -0
  228. package/skills/impeccable/scripts/live-browser.js +10295 -0
  229. package/skills/impeccable/scripts/live-commit-manual-edits.mjs +1241 -0
  230. package/skills/impeccable/scripts/live-complete.mjs +75 -0
  231. package/skills/impeccable/scripts/live-completion.mjs +19 -0
  232. package/skills/impeccable/scripts/live-copy-edit-agent.mjs +683 -0
  233. package/skills/impeccable/scripts/live-discard-manual-edits.mjs +51 -0
  234. package/skills/impeccable/scripts/live-event-validation.mjs +137 -0
  235. package/skills/impeccable/scripts/live-inject.mjs +557 -0
  236. package/skills/impeccable/scripts/live-insert-ui.mjs +458 -0
  237. package/skills/impeccable/scripts/live-insert.mjs +272 -0
  238. package/skills/impeccable/scripts/live-manual-edit-evidence.mjs +363 -0
  239. package/skills/impeccable/scripts/live-manual-edits-buffer.mjs +152 -0
  240. package/skills/impeccable/scripts/live-poll.mjs +379 -0
  241. package/skills/impeccable/scripts/live-resume.mjs +94 -0
  242. package/skills/impeccable/scripts/live-server.mjs +2326 -0
  243. package/skills/impeccable/scripts/live-session-store.mjs +289 -0
  244. package/skills/impeccable/scripts/live-status.mjs +61 -0
  245. package/skills/impeccable/scripts/live-svelte-component.mjs +826 -0
  246. package/skills/impeccable/scripts/live-sveltekit-adapter.mjs +274 -0
  247. package/skills/impeccable/scripts/live-ui-core.mjs +179 -0
  248. package/skills/impeccable/scripts/live-vocabulary.mjs +36 -0
  249. package/skills/impeccable/scripts/live-wrap.mjs +894 -0
  250. package/skills/impeccable/scripts/live.mjs +246 -0
  251. package/skills/impeccable/scripts/modern-screenshot.umd.js +14 -0
  252. package/skills/impeccable/scripts/palette.mjs +633 -0
  253. package/skills/impeccable/scripts/pin.mjs +214 -0
  254. package/skills/uipm-ui-styling/LICENSE.txt +202 -0
  255. package/skills/uipm-ui-styling/SKILL.md +328 -0
  256. package/skills/uipm-ui-styling/canvas-fonts/ArsenalSC-OFL.txt +93 -0
  257. package/skills/uipm-ui-styling/canvas-fonts/ArsenalSC-Regular.ttf +0 -0
  258. package/skills/uipm-ui-styling/canvas-fonts/BigShoulders-Bold.ttf +0 -0
  259. package/skills/uipm-ui-styling/canvas-fonts/BigShoulders-OFL.txt +93 -0
  260. package/skills/uipm-ui-styling/canvas-fonts/BigShoulders-Regular.ttf +0 -0
  261. package/skills/uipm-ui-styling/canvas-fonts/Boldonse-OFL.txt +93 -0
  262. package/skills/uipm-ui-styling/canvas-fonts/Boldonse-Regular.ttf +0 -0
  263. package/skills/uipm-ui-styling/canvas-fonts/BricolageGrotesque-Bold.ttf +0 -0
  264. package/skills/uipm-ui-styling/canvas-fonts/BricolageGrotesque-OFL.txt +93 -0
  265. package/skills/uipm-ui-styling/canvas-fonts/BricolageGrotesque-Regular.ttf +0 -0
  266. package/skills/uipm-ui-styling/canvas-fonts/CrimsonPro-Bold.ttf +0 -0
  267. package/skills/uipm-ui-styling/canvas-fonts/CrimsonPro-Italic.ttf +0 -0
  268. package/skills/uipm-ui-styling/canvas-fonts/CrimsonPro-OFL.txt +93 -0
  269. package/skills/uipm-ui-styling/canvas-fonts/CrimsonPro-Regular.ttf +0 -0
  270. package/skills/uipm-ui-styling/canvas-fonts/DMMono-OFL.txt +93 -0
  271. package/skills/uipm-ui-styling/canvas-fonts/DMMono-Regular.ttf +0 -0
  272. package/skills/uipm-ui-styling/canvas-fonts/EricaOne-OFL.txt +94 -0
  273. package/skills/uipm-ui-styling/canvas-fonts/EricaOne-Regular.ttf +0 -0
  274. package/skills/uipm-ui-styling/canvas-fonts/GeistMono-Bold.ttf +0 -0
  275. package/skills/uipm-ui-styling/canvas-fonts/GeistMono-OFL.txt +93 -0
  276. package/skills/uipm-ui-styling/canvas-fonts/GeistMono-Regular.ttf +0 -0
  277. package/skills/uipm-ui-styling/canvas-fonts/Gloock-OFL.txt +93 -0
  278. package/skills/uipm-ui-styling/canvas-fonts/Gloock-Regular.ttf +0 -0
  279. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexMono-Bold.ttf +0 -0
  280. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexMono-OFL.txt +93 -0
  281. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexMono-Regular.ttf +0 -0
  282. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexSerif-Bold.ttf +0 -0
  283. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexSerif-BoldItalic.ttf +0 -0
  284. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexSerif-Italic.ttf +0 -0
  285. package/skills/uipm-ui-styling/canvas-fonts/IBMPlexSerif-Regular.ttf +0 -0
  286. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSans-Bold.ttf +0 -0
  287. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSans-BoldItalic.ttf +0 -0
  288. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSans-Italic.ttf +0 -0
  289. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSans-OFL.txt +93 -0
  290. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSans-Regular.ttf +0 -0
  291. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSerif-Italic.ttf +0 -0
  292. package/skills/uipm-ui-styling/canvas-fonts/InstrumentSerif-Regular.ttf +0 -0
  293. package/skills/uipm-ui-styling/canvas-fonts/Italiana-OFL.txt +93 -0
  294. package/skills/uipm-ui-styling/canvas-fonts/Italiana-Regular.ttf +0 -0
  295. package/skills/uipm-ui-styling/canvas-fonts/JetBrainsMono-Bold.ttf +0 -0
  296. package/skills/uipm-ui-styling/canvas-fonts/JetBrainsMono-OFL.txt +93 -0
  297. package/skills/uipm-ui-styling/canvas-fonts/JetBrainsMono-Regular.ttf +0 -0
  298. package/skills/uipm-ui-styling/canvas-fonts/Jura-Light.ttf +0 -0
  299. package/skills/uipm-ui-styling/canvas-fonts/Jura-Medium.ttf +0 -0
  300. package/skills/uipm-ui-styling/canvas-fonts/Jura-OFL.txt +93 -0
  301. package/skills/uipm-ui-styling/canvas-fonts/LibreBaskerville-OFL.txt +93 -0
  302. package/skills/uipm-ui-styling/canvas-fonts/LibreBaskerville-Regular.ttf +0 -0
  303. package/skills/uipm-ui-styling/canvas-fonts/Lora-Bold.ttf +0 -0
  304. package/skills/uipm-ui-styling/canvas-fonts/Lora-BoldItalic.ttf +0 -0
  305. package/skills/uipm-ui-styling/canvas-fonts/Lora-Italic.ttf +0 -0
  306. package/skills/uipm-ui-styling/canvas-fonts/Lora-OFL.txt +93 -0
  307. package/skills/uipm-ui-styling/canvas-fonts/Lora-Regular.ttf +0 -0
  308. package/skills/uipm-ui-styling/canvas-fonts/NationalPark-Bold.ttf +0 -0
  309. package/skills/uipm-ui-styling/canvas-fonts/NationalPark-OFL.txt +93 -0
  310. package/skills/uipm-ui-styling/canvas-fonts/NationalPark-Regular.ttf +0 -0
  311. package/skills/uipm-ui-styling/canvas-fonts/NothingYouCouldDo-OFL.txt +93 -0
  312. package/skills/uipm-ui-styling/canvas-fonts/NothingYouCouldDo-Regular.ttf +0 -0
  313. package/skills/uipm-ui-styling/canvas-fonts/Outfit-Bold.ttf +0 -0
  314. package/skills/uipm-ui-styling/canvas-fonts/Outfit-OFL.txt +93 -0
  315. package/skills/uipm-ui-styling/canvas-fonts/Outfit-Regular.ttf +0 -0
  316. package/skills/uipm-ui-styling/canvas-fonts/PixelifySans-Medium.ttf +0 -0
  317. package/skills/uipm-ui-styling/canvas-fonts/PixelifySans-OFL.txt +93 -0
  318. package/skills/uipm-ui-styling/canvas-fonts/PoiretOne-OFL.txt +93 -0
  319. package/skills/uipm-ui-styling/canvas-fonts/PoiretOne-Regular.ttf +0 -0
  320. package/skills/uipm-ui-styling/canvas-fonts/RedHatMono-Bold.ttf +0 -0
  321. package/skills/uipm-ui-styling/canvas-fonts/RedHatMono-OFL.txt +93 -0
  322. package/skills/uipm-ui-styling/canvas-fonts/RedHatMono-Regular.ttf +0 -0
  323. package/skills/uipm-ui-styling/canvas-fonts/Silkscreen-OFL.txt +93 -0
  324. package/skills/uipm-ui-styling/canvas-fonts/Silkscreen-Regular.ttf +0 -0
  325. package/skills/uipm-ui-styling/canvas-fonts/SmoochSans-Medium.ttf +0 -0
  326. package/skills/uipm-ui-styling/canvas-fonts/SmoochSans-OFL.txt +93 -0
  327. package/skills/uipm-ui-styling/canvas-fonts/Tektur-Medium.ttf +0 -0
  328. package/skills/uipm-ui-styling/canvas-fonts/Tektur-OFL.txt +93 -0
  329. package/skills/uipm-ui-styling/canvas-fonts/Tektur-Regular.ttf +0 -0
  330. package/skills/uipm-ui-styling/canvas-fonts/WorkSans-Bold.ttf +0 -0
  331. package/skills/uipm-ui-styling/canvas-fonts/WorkSans-BoldItalic.ttf +0 -0
  332. package/skills/uipm-ui-styling/canvas-fonts/WorkSans-Italic.ttf +0 -0
  333. package/skills/uipm-ui-styling/canvas-fonts/WorkSans-OFL.txt +93 -0
  334. package/skills/uipm-ui-styling/canvas-fonts/WorkSans-Regular.ttf +0 -0
  335. package/skills/uipm-ui-styling/canvas-fonts/YoungSerif-OFL.txt +93 -0
  336. package/skills/uipm-ui-styling/canvas-fonts/YoungSerif-Regular.ttf +0 -0
  337. package/skills/uipm-ui-styling/references/canvas-design-system.md +320 -0
  338. package/skills/uipm-ui-styling/references/shadcn-accessibility.md +471 -0
  339. package/skills/uipm-ui-styling/references/shadcn-components.md +424 -0
  340. package/skills/uipm-ui-styling/references/shadcn-theming.md +373 -0
  341. package/skills/uipm-ui-styling/references/tailwind-customization.md +483 -0
  342. package/skills/uipm-ui-styling/references/tailwind-responsive.md +382 -0
  343. package/skills/uipm-ui-styling/references/tailwind-utilities.md +455 -0
  344. package/skills/uipm-ui-styling/scripts/.coverage +0 -0
  345. package/skills/uipm-ui-styling/scripts/requirements.txt +17 -0
  346. package/skills/uipm-ui-styling/scripts/shadcn_add.py +292 -0
  347. package/skills/uipm-ui-styling/scripts/tailwind_config_gen.py +456 -0
  348. package/skills/uipm-ui-styling/scripts/tests/coverage-ui.json +1 -0
  349. package/skills/uipm-ui-styling/scripts/tests/requirements.txt +3 -0
  350. package/skills/uipm-ui-styling/scripts/tests/test_shadcn_add.py +266 -0
  351. package/skills/uipm-ui-styling/scripts/tests/test_tailwind_config_gen.py +336 -0
@@ -0,0 +1,1896 @@
1
+ # Direct LLM backend for semantic extraction — supports Claude, Kimi K2.6,
2
+ # Gemini, and OpenAI.
3
+ # Used by `graphify extract . --backend gemini` and the benchmark scripts.
4
+ # The default graphify pipeline uses Claude Code subagents via skill.md;
5
+ # this module provides a direct API path for non-Claude-Code environments.
6
+ from __future__ import annotations
7
+
8
+ import base64
9
+ import json
10
+ import os
11
+ import re
12
+ import sys
13
+ import time
14
+ from collections.abc import Callable
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+ from dataclasses import dataclass, replace
17
+ from pathlib import Path
18
+
19
+ # `_read_files` truncates each file at this many characters before joining into
20
+ # the user message. Token estimates use the same cap so packing matches reality.
21
+ _FILE_CHAR_CAP = 20_000
22
+ # `_read_files` also wraps each file in a `=== {rel} ===\n...\n\n` separator;
23
+ # this is roughly the per-file overhead in characters that the prompt adds.
24
+ _PER_FILE_OVERHEAD_CHARS = 80
25
+ # Coarse fallback used only when `tiktoken` is not installed. 1 token ≈ 4 chars
26
+ # is the standard heuristic for English/code on BPE tokenizers.
27
+ _CHARS_PER_TOKEN = 4
28
+
29
+
30
+ def _get_tokenizer():
31
+ """Return a tiktoken encoder for accurate token counts, or None if tiktoken
32
+ is not installed. We use `cl100k_base` (GPT-4 / GPT-3.5-turbo) as a proxy:
33
+ Kimi-K2 ships a tiktoken-based tokenizer with very similar BPE behaviour,
34
+ and Claude's tokenizer has a comparable token-to-char ratio for prose/code.
35
+ Estimates only need to be within ~5%, not exact.
36
+ """
37
+ try:
38
+ import tiktoken
39
+ except ImportError:
40
+ return None
41
+ try:
42
+ return tiktoken.get_encoding("cl100k_base")
43
+ except Exception: # network failure on first-use download, etc.
44
+ return None
45
+
46
+
47
+ # Cached at import time. None if tiktoken is unavailable; consumers must handle.
48
+ _TOKENIZER = _get_tokenizer()
49
+
50
+ BACKENDS: dict[str, dict] = {
51
+ "claude": {
52
+ "base_url": "https://api.anthropic.com",
53
+ "default_model": "claude-sonnet-4-6",
54
+ "env_key": "ANTHROPIC_API_KEY",
55
+ "pricing": {"input": 3.0, "output": 15.0}, # USD per 1M tokens
56
+ "temperature": 0,
57
+ "max_tokens": 16384,
58
+ "vision": True,
59
+ },
60
+ "kimi": {
61
+ "base_url": "https://api.moonshot.ai/v1",
62
+ "default_model": "kimi-k2.6",
63
+ "env_key": "MOONSHOT_API_KEY",
64
+ # kimi-k2.6 is natively multimodal (MoonViT) and accepts the same
65
+ # OpenAI image_url data-URI block via Moonshot's compat endpoint.
66
+ "vision": True,
67
+ "pricing": {"input": 0.74, "output": 4.66}, # USD per 1M tokens
68
+ "temperature": None, # kimi-k2.6 enforces its own fixed temperature; sending any value raises 400
69
+ "max_tokens": 16384,
70
+ },
71
+ "ollama": {
72
+ "base_url": os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434/v1"),
73
+ "default_model": os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b"),
74
+ "env_key": "OLLAMA_API_KEY",
75
+ "pricing": {"input": 0.0, "output": 0.0},
76
+ "temperature": 0,
77
+ "max_tokens": 16384,
78
+ },
79
+ "gemini": {
80
+ "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
81
+ "default_model": "gemini-3-flash-preview",
82
+ "env_keys": ["GEMINI_API_KEY", "GOOGLE_API_KEY"],
83
+ "model_env_key": "GRAPHIFY_GEMINI_MODEL",
84
+ "pricing": {"input": 0.50, "output": 3.00}, # USD per 1M tokens
85
+ "temperature": 0,
86
+ "reasoning_effort": "low",
87
+ "max_completion_tokens": 16384,
88
+ "vision": True,
89
+ },
90
+ "openai": {
91
+ "base_url": "https://api.openai.com/v1",
92
+ "default_model": "gpt-4.1-mini",
93
+ "env_key": "OPENAI_API_KEY",
94
+ "model_env_key": "GRAPHIFY_OPENAI_MODEL",
95
+ "pricing": {"input": 0.40, "output": 1.60}, # USD per 1M tokens
96
+ "temperature": 0,
97
+ "vision": True,
98
+ },
99
+ "deepseek": {
100
+ "base_url": "https://api.deepseek.com",
101
+ "default_model": "deepseek-v4-flash",
102
+ "env_key": "DEEPSEEK_API_KEY",
103
+ "model_env_key": "GRAPHIFY_DEEPSEEK_MODEL",
104
+ "pricing": {"input": 0.14, "output": 0.28}, # USD per 1M tokens (v4-flash)
105
+ # deepseek-reasoner / thinking-mode models silently ignore temperature;
106
+ # deepseek-chat / v4-flash (non-thinking) accept 0-2. Safe to send 0.
107
+ "temperature": 0,
108
+ "max_tokens": 16384,
109
+ },
110
+ "azure": {
111
+ # Azure OpenAI Service — uses AzureOpenAI SDK client, not the standard
112
+ # OpenAI client, so it has its own call path (_call_azure).
113
+ # Required env vars: AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT.
114
+ # Optional: AZURE_OPENAI_API_VERSION (defaults to 2024-12-01-preview),
115
+ # AZURE_OPENAI_DEPLOYMENT or GRAPHIFY_AZURE_MODEL (deployment name).
116
+ # base_url is intentionally absent — prevents accidental routing through
117
+ # _call_openai_compat, which requires it and uses the wrong SDK client class.
118
+ "default_model": os.environ.get("AZURE_OPENAI_DEPLOYMENT", os.environ.get("GRAPHIFY_AZURE_MODEL", "gpt-4o")),
119
+ "env_key": "AZURE_OPENAI_API_KEY",
120
+ "model_env_key": "GRAPHIFY_AZURE_MODEL",
121
+ "pricing": {"input": 2.50, "output": 10.00}, # USD per 1M tokens (gpt-4o; may mis-estimate other deployments)
122
+ "temperature": 0,
123
+ "max_tokens": 16384,
124
+ },
125
+ "bedrock": {
126
+ "default_model": "anthropic.claude-3-5-sonnet-20241022-v2:0",
127
+ "model_env_key": "GRAPHIFY_BEDROCK_MODEL",
128
+ "pricing": {"input": 3.0, "output": 15.0}, # USD per 1M tokens
129
+ "temperature": 0,
130
+ "max_tokens": 16384,
131
+ "vision": True,
132
+ },
133
+ "claude-cli": {
134
+ # Routes through the locally-installed `claude` CLI (Claude Code) using
135
+ # `-p --output-format json`. Authenticates via the user's existing
136
+ # Pro/Max subscription instead of a separate ANTHROPIC_API_KEY — costs
137
+ # are billed to the plan, not pay-as-you-go API credit.
138
+ "default_model": "claude-code-plan",
139
+ "pricing": {"input": 0.0, "output": 0.0},
140
+ "temperature": 0,
141
+ "max_tokens": 16384,
142
+ # Claude Code is multimodal; images are passed by path and read with the
143
+ # CLI's Read tool rather than as inline base64 (see `_call_claude_cli`).
144
+ "vision": True,
145
+ },
146
+ }
147
+
148
+
149
+ def _custom_providers_path(global_: bool = True) -> Path:
150
+ if global_:
151
+ return Path.home() / ".graphify" / "providers.json"
152
+ return Path(".graphify") / "providers.json"
153
+
154
+
155
+ def provider_base_url_ok(base_url: str, name: str, *, warn: bool = True) -> bool:
156
+ """Structural safety check for a custom-provider base_url.
157
+
158
+ A custom provider receives the full corpus plus the user's API key, so its
159
+ base_url is an exfiltration channel. We deliberately do NOT run the ingest
160
+ SSRF guard here: that blocks private/internal IPs, which would wrongly reject
161
+ legitimate on-prem corporate LLM gateways. Instead we reject non-http(s)
162
+ schemes outright and warn loudly when the corpus would leave over plaintext
163
+ http to a non-loopback host. The primary control against trusting injected
164
+ config is the GRAPHIFY_ALLOW_LOCAL_PROVIDERS gate on project-local files.
165
+ """
166
+ from urllib.parse import urlparse
167
+ try:
168
+ parsed = urlparse(base_url)
169
+ except Exception:
170
+ if warn:
171
+ print(f"[graphify] WARNING: provider {name!r} has an unparseable base_url; ignoring.", file=sys.stderr)
172
+ return False
173
+ if parsed.scheme not in ("http", "https"):
174
+ if warn:
175
+ print(
176
+ f"[graphify] WARNING: provider {name!r} base_url scheme {parsed.scheme!r} is not "
177
+ "http/https; ignoring.",
178
+ file=sys.stderr,
179
+ )
180
+ return False
181
+ host = (parsed.hostname or "").lower()
182
+ is_loopback = host in ("localhost", "127.0.0.1", "::1") or host.startswith("127.")
183
+ if warn and parsed.scheme == "http" and not is_loopback:
184
+ print(
185
+ f"[graphify] WARNING: provider {name!r} sends your corpus to {host!r} over plaintext "
186
+ "http. Use https unless this is a trusted local endpoint.",
187
+ file=sys.stderr,
188
+ )
189
+ return True
190
+
191
+
192
+ def _load_custom_providers() -> dict[str, dict]:
193
+ # A project-local ./.graphify/providers.json travels with a cloned or shared
194
+ # repo and defines where the corpus + API key are sent, so loading it
195
+ # silently is a corpus/key exfiltration vector. Require an explicit opt-in;
196
+ # the user's own global ~/.graphify/providers.json stays trusted.
197
+ local_path = _custom_providers_path(global_=False)
198
+ global_path = _custom_providers_path(global_=True)
199
+ allow_local = os.environ.get("GRAPHIFY_ALLOW_LOCAL_PROVIDERS", "").strip().lower() in ("1", "true", "yes")
200
+ if local_path.is_file() and not allow_local:
201
+ print(
202
+ f"[graphify] WARNING: ignoring project-local {local_path} (custom providers control "
203
+ "where your corpus and API key are sent). Set GRAPHIFY_ALLOW_LOCAL_PROVIDERS=1 to load it.",
204
+ file=sys.stderr,
205
+ )
206
+
207
+ providers: dict[str, dict] = {}
208
+ paths = [local_path, global_path] if allow_local else [global_path]
209
+ for path in paths:
210
+ if path.is_file():
211
+ try:
212
+ data = json.loads(path.read_text(encoding="utf-8"))
213
+ if isinstance(data, dict):
214
+ for name, cfg in data.items():
215
+ if not (isinstance(name, str) and isinstance(cfg, dict)):
216
+ continue
217
+ if name in BACKENDS or name in providers:
218
+ continue
219
+ if not provider_base_url_ok(str(cfg.get("base_url", "")), name):
220
+ continue
221
+ if "pricing" not in cfg:
222
+ cfg = dict(cfg, pricing={"input": 0.0, "output": 0.0})
223
+ providers[name] = cfg
224
+ except Exception:
225
+ pass
226
+ return providers
227
+
228
+
229
+ BACKENDS.update(_load_custom_providers())
230
+
231
+
232
+ def _resolve_max_tokens(default: int) -> int:
233
+ """Honour GRAPHIFY_MAX_OUTPUT_TOKENS env var override, else use backend default."""
234
+ raw = os.environ.get("GRAPHIFY_MAX_OUTPUT_TOKENS", "").strip()
235
+ if raw:
236
+ try:
237
+ v = int(raw)
238
+ if v > 0:
239
+ return v
240
+ except ValueError:
241
+ pass
242
+ return default
243
+
244
+
245
+ def _resolve_api_timeout(default: float = 600.0) -> float:
246
+ """Honour GRAPHIFY_API_TIMEOUT env var override, else use default (seconds)."""
247
+ raw = os.environ.get("GRAPHIFY_API_TIMEOUT", "").strip()
248
+ if raw:
249
+ try:
250
+ v = float(raw)
251
+ if v > 0:
252
+ return v
253
+ except ValueError:
254
+ pass
255
+ return default
256
+
257
+ _EXTRACTION_SYSTEM = """\
258
+ You are a graphify semantic extraction agent. Extract a knowledge graph fragment from the files provided.
259
+ Output ONLY valid JSON — no explanation, no markdown fences, no preamble.
260
+
261
+ Rules:
262
+ - EXTRACTED: relationship explicit in source (import, call, citation, reference)
263
+ - INFERRED: reasonable inference (shared data structure, implied dependency)
264
+ - AMBIGUOUS: uncertain — flag for review, do not omit
265
+
266
+ Node ID format: lowercase, only [a-z0-9_], no dots or slashes.
267
+ Format: {stem}_{entity} where stem = filename without extension, entity = symbol name (both normalised).
268
+
269
+ Output exactly this schema:
270
+ {"nodes":[{"id":"stem_entity","label":"Human Readable Name","file_type":"code|document|paper|image|rationale|concept","source_file":"relative/path","source_location":null,"source_url":null,"captured_at":null,"author":null,"contributor":null}],"edges":[{"source":"node_id","target":"node_id","relation":"calls|implements|references|cites|conceptually_related_to|shares_data_with|semantically_similar_to","confidence":"EXTRACTED|INFERRED|AMBIGUOUS","confidence_score":1.0,"source_file":"relative/path","source_location":null,"weight":1.0}],"hyperedges":[],"input_tokens":0,"output_tokens":0}
271
+ """
272
+
273
+ _DEEP_EXTRACTION_SUFFIX = """\
274
+
275
+ DEEP_MODE: include additional INFERRED edges only for concrete architectural
276
+ signals (shared data contracts, explicit lifecycle coupling, or multi-step flow
277
+ dependencies visible in the sources). Avoid broad conceptual similarity edges.
278
+ Mark uncertain ones AMBIGUOUS instead of omitting.
279
+ """
280
+
281
+
282
+ def _extraction_system(*, deep: bool = False) -> str:
283
+ """Return the semantic-extraction system prompt, optionally in deep mode."""
284
+ if not deep:
285
+ return _EXTRACTION_SYSTEM
286
+ return _EXTRACTION_SYSTEM + _DEEP_EXTRACTION_SUFFIX
287
+
288
+
289
+ def _file_to_text(path: Path) -> str:
290
+ """Return a text-like file's content for the extraction prompt.
291
+
292
+ Most files are read directly. PDFs are binary, so reading them with
293
+ `read_text` yields garbage (the same failure images had); route them through
294
+ pypdf instead. A scanned PDF with no text layer extracts to an empty string,
295
+ which still produces a reference node rather than noise.
296
+ """
297
+ if path.suffix.lower() == ".pdf":
298
+ from graphify.detect import extract_pdf_text
299
+ return extract_pdf_text(path)
300
+ return path.read_text(encoding="utf-8", errors="replace")
301
+
302
+
303
+ def _read_files(paths: list[Path], root: Path) -> str:
304
+ """Return file contents formatted for the extraction prompt."""
305
+ parts: list[str] = []
306
+ for p in paths:
307
+ try:
308
+ rel = p.relative_to(root)
309
+ except ValueError:
310
+ rel = p
311
+ try:
312
+ content = _file_to_text(p)
313
+ except OSError:
314
+ continue
315
+ parts.append(f"=== {rel} ===\n{content[:20000]}")
316
+ return "\n\n".join(parts)
317
+
318
+
319
+ # ── Image (vision) handling ───────────────────────────────────────────────────
320
+ # Raster image types a vision model can actually look at. `.svg` is intentionally
321
+ # excluded: it is XML markup, so `_read_files` reads it as text (the model parses
322
+ # the source directly), which is more useful than rasterising it. Before this,
323
+ # every image was fed through `path.read_text(errors="replace")`, turning binary
324
+ # pixels into garbage text — noise for API backends and an outright `exit 1` for
325
+ # the claude-cli backend.
326
+ _VISION_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp"}
327
+ _IMAGE_MEDIA_TYPES = {
328
+ ".png": "image/png",
329
+ ".jpg": "image/jpeg",
330
+ ".jpeg": "image/jpeg",
331
+ ".gif": "image/gif",
332
+ ".webp": "image/webp",
333
+ }
334
+ # Per-image byte ceiling. Anthropic caps a request at 32 MB and Bedrock images
335
+ # at ~5 MB; 5 MB per image keeps every backend within limits. Oversized images
336
+ # fall back to a text reference (the node is still created, just unseen).
337
+ _MAX_IMAGE_BYTES = 5 * 1024 * 1024
338
+ # Flat token estimate per image for chunk packing. Vision models bill an image
339
+ # at a roughly fixed cost regardless of file size, so estimating by byte size
340
+ # (as the generic path does) would force every large PNG into its own chunk.
341
+ _IMAGE_TOKEN_ESTIMATE = 1_600
342
+ # Hard cap on images per chunk, independent of the token budget. A large
343
+ # token budget would otherwise pack hundreds of images into one request —
344
+ # past provider per-request image limits (Anthropic allows 100), and far too
345
+ # many for the claude-cli Read-tool loop to work through. Keeps memory and
346
+ # request size bounded on image-dense corpora.
347
+ _MAX_IMAGES_PER_CHUNK = 20
348
+ # Backends that read an image by file path (claude-cli's Read tool)
349
+ # instead of inlining base64. They open the file themselves and downsample as
350
+ # needed, so `_MAX_IMAGE_BYTES` does not apply and the bytes never need loading.
351
+ _PATH_IMAGE_BACKENDS = {"claude-cli"}
352
+
353
+
354
+ @dataclass
355
+ class _ImageRef:
356
+ """A single image destined for a vision request.
357
+
358
+ `raw` is None when the image is unreadable or exceeds `_MAX_IMAGE_BYTES`, or
359
+ when the target backend has no vision support — in every such case the
360
+ renderers emit a text reference instead of pixels, so the image still
361
+ becomes a graph node.
362
+ """
363
+
364
+ path: Path # absolute path (claude-cli reads it via the Read tool)
365
+ rel: str # path relative to the corpus root (the node's source_file)
366
+ media_type: str # e.g. "image/png"
367
+ raw: bytes | None
368
+
369
+ @property
370
+ def b64(self) -> str:
371
+ return base64.standard_b64encode(self.raw).decode("ascii") if self.raw else ""
372
+
373
+ @property
374
+ def bedrock_format(self) -> str:
375
+ # Converse wants a bare format token, not a media type.
376
+ return self.media_type.split("/", 1)[-1]
377
+
378
+
379
+ def _is_vision_image(path: Path) -> bool:
380
+ return path.suffix.lower() in _VISION_IMAGE_EXTENSIONS
381
+
382
+
383
+ def _partition_semantic_files(files: list[Path]) -> tuple[list[Path], list[Path]]:
384
+ """Split a chunk into (text-like files, raster-image files)."""
385
+ text_files = [f for f in files if not _is_vision_image(f)]
386
+ image_files = [f for f in files if _is_vision_image(f)]
387
+ return text_files, image_files
388
+
389
+
390
+ def _build_image_refs(image_files: list[Path], root: Path, *, read_bytes: bool = True) -> list[_ImageRef]:
391
+ """Build `_ImageRef`s for raster images.
392
+
393
+ `read_bytes=True` (base64 backends) loads the pixels and drops any image over
394
+ `_MAX_IMAGE_BYTES` to a reference, because a base64 request body has a hard
395
+ size ceiling. `read_bytes=False` (path-based backends — claude-cli)
396
+ skips the read entirely: those backends open the file themselves and
397
+ downsample as needed, so there is no per-image size limit and no reason to
398
+ load (potentially tens of MB of) bytes that would never be used.
399
+ """
400
+ refs: list[_ImageRef] = []
401
+ for p in image_files:
402
+ try:
403
+ rel = str(p.relative_to(root))
404
+ except ValueError:
405
+ rel = str(p)
406
+ media = _IMAGE_MEDIA_TYPES.get(p.suffix.lower(), "image/png")
407
+ raw: bytes | None = None
408
+ if read_bytes:
409
+ try:
410
+ raw = p.read_bytes()
411
+ except OSError as exc:
412
+ print(f"[graphify] could not read image {rel}: {exc}", file=sys.stderr)
413
+ raw = None
414
+ if raw is not None and len(raw) > _MAX_IMAGE_BYTES:
415
+ print(
416
+ f"[graphify] image {rel} is {len(raw) // 1024} KB, over the "
417
+ f"{_MAX_IMAGE_BYTES // (1024 * 1024)} MB inline-image limit for this "
418
+ "backend; sending it as a reference node without inline pixels.",
419
+ file=sys.stderr,
420
+ )
421
+ raw = None
422
+ try:
423
+ abs_path = p.resolve()
424
+ except OSError:
425
+ abs_path = p
426
+ refs.append(_ImageRef(abs_path, rel, media, raw))
427
+ return refs
428
+
429
+
430
+ def _strip_pixels(refs: list[_ImageRef]) -> list[_ImageRef]:
431
+ """Return refs with pixel data dropped (for non-vision backends)."""
432
+ return [replace(r, raw=None) for r in refs]
433
+
434
+
435
+ def _backend_supports_vision(backend: str) -> bool:
436
+ """Whether `backend`'s configured model can see images.
437
+
438
+ Ollama is special-cased: its default model is text-only, so vision is
439
+ opt-in via GRAPHIFY_OLLAMA_VISION=1 once the user selects a vision model
440
+ (e.g. --model llama3.2-vision).
441
+ """
442
+ if backend == "ollama":
443
+ return os.environ.get("GRAPHIFY_OLLAMA_VISION", "").strip() == "1"
444
+ return bool(BACKENDS.get(backend, {}).get("vision", False))
445
+
446
+
447
+ def _image_notes(refs: list[_ImageRef], *, with_paths: bool = False) -> str:
448
+ """Text block listing the images so the model emits one node per image.
449
+
450
+ Always included alongside the visual payload (and used on its own when the
451
+ backend can't see pixels), so an image becomes a graph node either way.
452
+ `with_paths=True` also lists the absolute path and asks the model to open it
453
+ with the Read tool — used by the claude-cli backend.
454
+ """
455
+ if not refs:
456
+ return ""
457
+ if with_paths:
458
+ header = (
459
+ "Use the Read tool to open and view each image file at the path below, "
460
+ "then emit one node per image"
461
+ )
462
+ else:
463
+ header = (
464
+ "The following image file(s) are attached as visual input. Emit one "
465
+ "node per image"
466
+ )
467
+ lines = [
468
+ "=== IMAGES ===",
469
+ f"{header} with \"file_type\":\"image\" and the listed source_file, a label "
470
+ "describing what it depicts (diagram, screenshot, chart, photo, UI, logo), "
471
+ "and edges to any code/doc nodes the image clearly references.",
472
+ ]
473
+ for i, r in enumerate(refs, 1):
474
+ note = f"[image {i}] source_file: {r.rel}"
475
+ if with_paths:
476
+ note += f" path: {r.path}"
477
+ if r.raw is None and not with_paths:
478
+ note += " (not shown: unreadable or exceeds size limit)"
479
+ lines.append(note)
480
+ return "\n".join(lines)
481
+
482
+
483
+ def _with_image_notes(user_message: str, refs: list[_ImageRef], *, with_paths: bool = False) -> str:
484
+ notes = _image_notes(refs, with_paths=with_paths)
485
+ if not notes:
486
+ return user_message
487
+ if not user_message.strip():
488
+ return notes
489
+ return f"{user_message}\n\n{notes}"
490
+
491
+
492
+ def _anthropic_content(user_message: str, refs: list[_ImageRef]):
493
+ """Build the Anthropic `messages[].content` value (str, or block list with images)."""
494
+ blocks = [
495
+ {"type": "image", "source": {"type": "base64", "media_type": r.media_type, "data": r.b64}}
496
+ for r in refs
497
+ if r.raw
498
+ ]
499
+ text = _with_image_notes(user_message, refs)
500
+ if not blocks:
501
+ return text
502
+ return [*blocks, {"type": "text", "text": text}]
503
+
504
+
505
+ def _openai_content(user_message: str, refs: list[_ImageRef]):
506
+ """Build the OpenAI-compatible user `content` value (str, or part list with images)."""
507
+ parts: list[dict] = [
508
+ {
509
+ "type": "image_url",
510
+ "image_url": {"url": f"data:{r.media_type};base64,{r.b64}", "detail": "auto"},
511
+ }
512
+ for r in refs
513
+ if r.raw
514
+ ]
515
+ text = _with_image_notes(user_message, refs)
516
+ if not parts:
517
+ return text
518
+ return [{"type": "text", "text": text}, *parts]
519
+
520
+
521
+ def _bedrock_content(user_message: str, refs: list[_ImageRef]) -> list[dict]:
522
+ """Build the Bedrock Converse user content list (raw bytes, not base64)."""
523
+ content: list[dict] = [
524
+ {"image": {"format": r.bedrock_format, "source": {"bytes": r.raw}}}
525
+ for r in refs
526
+ if r.raw
527
+ ]
528
+ content.append({"text": _with_image_notes(user_message, refs)})
529
+ return content
530
+
531
+
532
+ _LLM_JSON_MAX_BYTES = 10 * 1024 * 1024 # 10 MB hard cap before json.loads (F-016)
533
+
534
+
535
+ def _parse_llm_json(raw: str) -> dict:
536
+ """Strip optional markdown fences and parse JSON. Returns empty fragment on failure.
537
+
538
+ Caps the input at `_LLM_JSON_MAX_BYTES` so a hostile or runaway model
539
+ response cannot exhaust memory inside `json.loads` (F-016).
540
+ """
541
+ if len(raw) > _LLM_JSON_MAX_BYTES:
542
+ print(
543
+ f"[graphify] LLM response exceeds {_LLM_JSON_MAX_BYTES} bytes "
544
+ f"({len(raw)} bytes); refusing to parse and dropping chunk.",
545
+ file=sys.stderr,
546
+ )
547
+ return {"nodes": [], "edges": [], "hyperedges": []}
548
+ # Strategy 1: strip whitespace, then handle markdown fences anywhere in the
549
+ # text (not only at offset 0 — the original code only stripped fences when
550
+ # `raw.startswith("```")`, missing the common case where Claude prepends a
551
+ # preamble like "Here's the extracted entities:\n\n```json\n{...}\n```").
552
+ stripped = raw.strip()
553
+ fence_start = stripped.find("```")
554
+ if fence_start != -1:
555
+ after_fence = stripped[fence_start + 3 :]
556
+ # Optional language tag (json, JSON, javascript, etc.) up to newline.
557
+ nl = after_fence.find("\n")
558
+ if nl != -1 and after_fence[:nl].strip().lower() in {"json", "javascript", "js", ""}:
559
+ after_fence = after_fence[nl + 1 :]
560
+ fence_end = after_fence.rfind("```")
561
+ if fence_end != -1:
562
+ stripped = after_fence[:fence_end].strip()
563
+ else:
564
+ stripped = after_fence.strip()
565
+ try:
566
+ return json.loads(stripped)
567
+ except json.JSONDecodeError:
568
+ pass
569
+ # Strategy 2: extract the first balanced JSON object found anywhere in
570
+ # the text. Handles the case where Claude wraps the JSON in prose without
571
+ # any markdown fence ("The extracted graph is { ... }. Hope this helps!").
572
+ start = stripped.find("{")
573
+ if start != -1:
574
+ depth = 0
575
+ in_string = False
576
+ escape = False
577
+ for i in range(start, len(stripped)):
578
+ ch = stripped[i]
579
+ if escape:
580
+ escape = False
581
+ continue
582
+ if ch == "\\":
583
+ escape = True
584
+ continue
585
+ if ch == '"':
586
+ in_string = not in_string
587
+ continue
588
+ if in_string:
589
+ continue
590
+ if ch == "{":
591
+ depth += 1
592
+ elif ch == "}":
593
+ depth -= 1
594
+ if depth == 0:
595
+ try:
596
+ return json.loads(stripped[start : i + 1])
597
+ except json.JSONDecodeError:
598
+ break
599
+ print(
600
+ f"[graphify] LLM returned invalid JSON, skipping chunk "
601
+ f"(first 200 chars: {raw[:200]!r})",
602
+ file=sys.stderr,
603
+ )
604
+ return {"nodes": [], "edges": [], "hyperedges": []}
605
+
606
+
607
+ def _response_is_hollow(raw_content: str | None, parsed: dict) -> bool:
608
+ """Detect a successful HTTP response that yielded no usable extraction.
609
+
610
+ A local model under load (most often Ollama) can return HTTP 200 with an
611
+ empty / null `message.content`, with whitespace, or with a half-generated
612
+ JSON prefix that fails to parse. All of these collapse to a "successful"
613
+ call producing zero nodes and zero edges. Without this check the chunk
614
+ is silently dropped from the corpus because no exception is raised and
615
+ `finish_reason` is `"stop"` rather than `"length"`. By flagging the
616
+ result as hollow, callers can re-route it through the same bisection
617
+ path used for context-window overflow and `finish_reason="length"`.
618
+ """
619
+ if raw_content is None or not raw_content.strip():
620
+ return True
621
+ nodes = parsed.get("nodes")
622
+ edges = parsed.get("edges")
623
+ hyperedges = parsed.get("hyperedges")
624
+ return not nodes and not edges and not hyperedges
625
+
626
+
627
+ def _backend_env_keys(backend: str) -> list[str]:
628
+ """Return accepted API-key environment variables for a backend."""
629
+ cfg = BACKENDS[backend]
630
+ keys = cfg.get("env_keys")
631
+ if keys:
632
+ return list(keys)
633
+ env_key = cfg.get("env_key")
634
+ if env_key:
635
+ return [env_key]
636
+ return []
637
+
638
+
639
+ def _get_backend_api_key(backend: str) -> str:
640
+ """Return the first configured API key for backend, or an empty string."""
641
+ for env_key in _backend_env_keys(backend):
642
+ value = os.environ.get(env_key)
643
+ if value:
644
+ return value
645
+ return ""
646
+
647
+
648
+ def _format_backend_env_keys(backend: str) -> str:
649
+ """Return user-facing accepted API-key variable names."""
650
+ keys = _backend_env_keys(backend)
651
+ return " or ".join(keys) if keys else "AWS_PROFILE or AWS_REGION"
652
+
653
+
654
+ def _default_model_for_backend(backend: str) -> str:
655
+ """Return configured model override or backend default model."""
656
+ cfg = BACKENDS[backend]
657
+ model_env_key = cfg.get("model_env_key")
658
+ if model_env_key:
659
+ model = os.environ.get(model_env_key)
660
+ if model:
661
+ return model
662
+ return cfg["default_model"]
663
+
664
+
665
+ def _backend_pkg_hint(pkg: str, extra: str) -> str:
666
+ """Package-missing message that works for the recommended `uv tool` install.
667
+
668
+ `uv tool install graphifyy` puts graphify in an isolated venv, so a plain
669
+ `pip install <pkg>` never reaches it - the friction a user hits when a
670
+ backend needs anthropic/openai/boto3 and the only advice was "pip install".
671
+ Point at the extra and the uv path first, then the pip/venv fallback.
672
+ """
673
+ return (
674
+ f"the '{pkg}' package is required for this backend but is not installed. "
675
+ f"Install it with: uv tool install \"graphifyy[{extra}]\" --force "
676
+ f"(uv tool), or pip install {pkg} (pip/venv install)."
677
+ )
678
+
679
+
680
+ def _call_openai_compat(
681
+ base_url: str,
682
+ api_key: str,
683
+ model: str,
684
+ user_message: str,
685
+ temperature: float | None = 0,
686
+ reasoning_effort: str | None = None,
687
+ max_completion_tokens: int = 8192,
688
+ *,
689
+ backend: str = "",
690
+ deep_mode: bool = False,
691
+ images: list[_ImageRef] | None = None,
692
+ ) -> dict:
693
+ """Call any OpenAI-compatible API (Kimi, OpenAI, etc.) and return parsed JSON."""
694
+ try:
695
+ from openai import OpenAI
696
+ except ImportError as exc:
697
+ extra = backend if backend in ("kimi", "gemini", "openai", "ollama") else "openai"
698
+ raise ImportError(_backend_pkg_hint("openai", extra)) from exc
699
+
700
+ # Local backends (ollama, llama.cpp, vLLM) routinely take >60s for a
701
+ # single chunk on a large model — far longer than the openai SDK's
702
+ # default. Honour GRAPHIFY_API_TIMEOUT (seconds) for explicit override;
703
+ # default to 600s, which is long enough for a 31B model on a 16k chunk
704
+ # but still bounds runaway connections (issue #792 addendum).
705
+ client = OpenAI(api_key=api_key, base_url=base_url, timeout=_resolve_api_timeout())
706
+ kwargs: dict = {
707
+ "model": model,
708
+ "messages": [
709
+ {"role": "system", "content": _extraction_system(deep=deep_mode)},
710
+ {"role": "user", "content": _openai_content(user_message, images or [])},
711
+ ],
712
+ "max_completion_tokens": max_completion_tokens,
713
+ }
714
+ if temperature is not None:
715
+ kwargs["temperature"] = temperature
716
+ if reasoning_effort is not None:
717
+ kwargs["reasoning_effort"] = reasoning_effort
718
+ # Kimi-k2.6 is a reasoning model — disable thinking so content isn't empty
719
+ if "moonshot" in base_url:
720
+ kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
721
+ # Ollama defaults num_ctx to 2048 and silently truncates prompts larger
722
+ # than that — the symptom is hollow 200 OK responses after the first few
723
+ # chunks (#798). We derive num_ctx from the actual prompt size so we don't
724
+ # over-allocate KV-cache VRAM. Over-allocation (e.g. 128k slots for an 8k
725
+ # prompt on a 31B model) exhausts VRAM by chunk 4 and produces the same
726
+ # hollow-200 symptom — just from a different direction (#798 follow-up).
727
+ # Formula: actual input tokens + output cap + system prompt headroom.
728
+ # Capped at 131072 (enough for the default 60k token_budget); env var wins.
729
+ if backend == "ollama":
730
+ num_ctx_raw = os.environ.get("GRAPHIFY_OLLAMA_NUM_CTX", "").strip()
731
+ # Auto-derive num_ctx from actual chunk size regardless — used as the
732
+ # fallback and for the mismatch check below.
733
+ estimated_input = len(user_message) // _CHARS_PER_TOKEN + 400
734
+ auto_num_ctx = min(estimated_input + max_completion_tokens + 2000, 131072)
735
+ auto_num_ctx = max(auto_num_ctx, 8192)
736
+ if num_ctx_raw:
737
+ try:
738
+ num_ctx = int(num_ctx_raw)
739
+ except ValueError:
740
+ # Bad env var: fall through to auto-derivation (not 131072 —
741
+ # hardcoding the cap is what causes OOM on constrained VRAM).
742
+ print(
743
+ f"[graphify] GRAPHIFY_OLLAMA_NUM_CTX={num_ctx_raw!r} is not a valid integer; "
744
+ f"using auto-derived value ({auto_num_ctx}).",
745
+ file=sys.stderr,
746
+ )
747
+ num_ctx = auto_num_ctx
748
+ else:
749
+ # Warn when the pinned value is smaller than the estimated input —
750
+ # Ollama silently truncates the prompt and returns empty responses.
751
+ if num_ctx < estimated_input:
752
+ print(
753
+ f"[graphify] warning: GRAPHIFY_OLLAMA_NUM_CTX={num_ctx} is smaller than "
754
+ f"the estimated chunk input (~{estimated_input} tokens). Ollama will "
755
+ f"silently truncate the prompt and return empty responses. "
756
+ f"Try --token-budget {max(1024, num_ctx // 3)} or increase NUM_CTX.",
757
+ file=sys.stderr,
758
+ )
759
+ else:
760
+ # Estimate input tokens: user_message chars / 4 (standard BPE
761
+ # heuristic) + 400 for the system prompt, then add output headroom.
762
+ num_ctx = auto_num_ctx
763
+ keep_alive = os.environ.get("GRAPHIFY_OLLAMA_KEEP_ALIVE", "30m")
764
+ kwargs["extra_body"] = {"options": {"num_ctx": num_ctx}, "keep_alive": keep_alive}
765
+ resp = client.chat.completions.create(**kwargs)
766
+ if not resp.choices or resp.choices[0].message is None:
767
+ raise ValueError("LLM returned empty or filtered response")
768
+ raw_content = resp.choices[0].message.content
769
+ result = _parse_llm_json(raw_content or "{}")
770
+ result["input_tokens"] = resp.usage.prompt_tokens if resp.usage else 0
771
+ result["output_tokens"] = resp.usage.completion_tokens if resp.usage else 0
772
+ result["model"] = model
773
+ # `finish_reason == "length"` means the model hit max_completion_tokens
774
+ # mid-generation. The JSON we got back is truncated; callers should
775
+ # treat this as a signal to retry with smaller input.
776
+ result["finish_reason"] = resp.choices[0].finish_reason
777
+ # An overwhelmed local model (typically Ollama) can return HTTP 200 with
778
+ # empty / null content or unparseable half-generated JSON. The call looks
779
+ # successful, `finish_reason` is `"stop"`, and the chunk would be silently
780
+ # dropped from the corpus. Re-label as `"length"` so the adaptive retry
781
+ # layer bisects the chunk — same recovery as a true truncation.
782
+ if _response_is_hollow(raw_content, result) and result["finish_reason"] != "length":
783
+ print(
784
+ f"[graphify] {backend or 'backend'} returned a hollow response "
785
+ f"(content={'empty' if not (raw_content or '').strip() else 'no nodes/edges'}, "
786
+ f"output_tokens={result['output_tokens']}); "
787
+ "treating as truncation so adaptive retry can bisect the chunk.",
788
+ file=sys.stderr,
789
+ )
790
+ result["finish_reason"] = "length"
791
+ output_tokens = result["output_tokens"]
792
+ if output_tokens < 50 and backend == "ollama":
793
+ print(
794
+ "[graphify] warning: ollama returned very few tokens — likely causes: "
795
+ "(1) VRAM pressure: check `nvidia-smi` and reduce chunk size with "
796
+ "--token-budget (e.g. --token-budget 4096) or set "
797
+ "GRAPHIFY_OLLAMA_NUM_CTX to a smaller value; "
798
+ "(2) model too small for JSON instruction following — "
799
+ "try a larger model with --model (e.g. --model qwen2.5-coder:14b).",
800
+ file=sys.stderr,
801
+ )
802
+ return result
803
+
804
+
805
+ def _call_claude(api_key: str, model: str, user_message: str, max_tokens: int = 8192, *, deep_mode: bool = False, images: list[_ImageRef] | None = None) -> dict:
806
+ """Call Anthropic Claude directly (not via OpenAI compat layer)."""
807
+ try:
808
+ import anthropic
809
+ except ImportError as exc:
810
+ raise ImportError(_backend_pkg_hint("anthropic", "anthropic")) from exc
811
+
812
+ client = anthropic.Anthropic(api_key=api_key, timeout=_resolve_api_timeout())
813
+ resp = client.messages.create(
814
+ model=model,
815
+ max_tokens=max_tokens,
816
+ system=_extraction_system(deep=deep_mode),
817
+ messages=[{"role": "user", "content": _anthropic_content(user_message, images or [])}],
818
+ )
819
+ raw_content = resp.content[0].text if resp.content else None
820
+ result = _parse_llm_json(raw_content or "{}")
821
+ result["input_tokens"] = resp.usage.input_tokens if resp.usage else 0
822
+ result["output_tokens"] = resp.usage.output_tokens if resp.usage else 0
823
+ result["model"] = model
824
+ # Normalise Anthropic's `stop_reason` to the OpenAI-compat `finish_reason`
825
+ # vocabulary so the adaptive-retry layer doesn't have to know which
826
+ # backend produced the result.
827
+ result["finish_reason"] = "length" if resp.stop_reason == "max_tokens" else "stop"
828
+ if _response_is_hollow(raw_content, result) and result["finish_reason"] != "length":
829
+ print(
830
+ "[graphify] claude returned a hollow response; treating as "
831
+ "truncation so adaptive retry can bisect the chunk.",
832
+ file=sys.stderr,
833
+ )
834
+ result["finish_reason"] = "length"
835
+ return result
836
+
837
+
838
+ def _call_claude_cli(user_message: str, max_tokens: int = 8192, *, deep_mode: bool = False, images: list[_ImageRef] | None = None) -> dict:
839
+ """Call Claude via the locally-installed Claude Code CLI (`claude -p`).
840
+
841
+ Routes through the user's Claude Code subscription auth instead of a separate
842
+ ANTHROPIC_API_KEY. Useful for Pro/Max subscribers who don't want to provision
843
+ a pay-as-you-go API key just to run graphify's semantic pass.
844
+
845
+ Images are passed by absolute path rather than inline base64: the prompt asks
846
+ the model to open each one with its Read tool, and each containing directory
847
+ is allowlisted with `--add-dir` so the read is permitted.
848
+ """
849
+ import platform
850
+ import shutil
851
+ import subprocess
852
+
853
+ # On Windows, npm installs `claude` as both `claude.ps1` and `claude.cmd`
854
+ # alongside each other. When PATHEXT lists `.PS1` before `.CMD`,
855
+ # `shutil.which("claude")` returns `claude.ps1`, which `CreateProcess`
856
+ # cannot execute directly — it raises `[WinError 2] The system cannot
857
+ # find the file specified`. `claude.cmd` IS executable by CreateProcess,
858
+ # so prefer it explicitly on Windows. See issue #1072.
859
+ claude_cmd = "claude"
860
+ if platform.system() == "Windows":
861
+ cmd_path = shutil.which("claude.cmd")
862
+ if cmd_path:
863
+ claude_cmd = cmd_path
864
+ elif shutil.which("claude") is None:
865
+ raise RuntimeError(
866
+ "Claude Code CLI not found on $PATH. Install from "
867
+ "https://claude.ai/code and run `claude` once to authenticate."
868
+ )
869
+ elif shutil.which("claude") is None:
870
+ raise RuntimeError(
871
+ "Claude Code CLI not found on $PATH. Install from "
872
+ "https://claude.ai/code and run `claude` once to authenticate."
873
+ )
874
+
875
+ # Use --system-prompt (replaces) instead of --append-system-prompt (adds
876
+ # to Claude Code's default coding-agent prompt). The default prompt
877
+ # pushes the model towards markdown + prose explanations, which conflict
878
+ # with the "raw JSON only" extraction instruction and cause ~30-50% of
879
+ # responses to come back wrapped in ```json fences or prefixed with a
880
+ # preamble — both of which fail the strict json.loads in _parse_llm_json.
881
+ # Replacing the default prompt eliminates the conflict at the source.
882
+ # Side benefit: cache-creation tokens per call drop ~19% in practice.
883
+ # When images are present, append the Read-the-paths instruction and
884
+ # allowlist each containing directory so the CLI's Read tool can open them.
885
+ add_dir_args: list[str] = []
886
+ if images:
887
+ user_message = _with_image_notes(user_message, images, with_paths=True)
888
+ seen_dirs: set[str] = set()
889
+ for r in images:
890
+ d = str(r.path.parent)
891
+ if d not in seen_dirs:
892
+ seen_dirs.add(d)
893
+ add_dir_args.extend(["--add-dir", d])
894
+
895
+ cli_args = [
896
+ claude_cmd, "-p",
897
+ "--output-format", "json",
898
+ "--no-session-persistence",
899
+ *add_dir_args,
900
+ "--system-prompt", _extraction_system(deep=deep_mode),
901
+ ]
902
+ # claude-cli defaults to Opus, which is overkill for the structured-JSON
903
+ # extraction graphify performs. GRAPHIFY_CLAUDE_CLI_MODEL=haiku (or
904
+ # sonnet, or a full model ID like claude-haiku-4-5-20251001) lets users
905
+ # opt into a cheaper / faster model. Default behaviour unchanged when
906
+ # the env var is unset.
907
+ cli_model = os.environ.get("GRAPHIFY_CLAUDE_CLI_MODEL", "").strip()
908
+ if cli_model:
909
+ cli_args.extend(["--model", cli_model])
910
+ proc = subprocess.run(
911
+ cli_args,
912
+ input=user_message,
913
+ capture_output=True,
914
+ text=True,
915
+ encoding="utf-8", # Force UTF-8 — prevents UnicodeEncodeError on Windows cp1252
916
+ timeout=_resolve_api_timeout(),
917
+ check=False,
918
+ )
919
+ if proc.returncode != 0:
920
+ raise RuntimeError(
921
+ f"claude -p exited {proc.returncode}: {proc.stderr.strip()[:500]}"
922
+ )
923
+
924
+ try:
925
+ envelope = json.loads(proc.stdout)
926
+ except json.JSONDecodeError as exc:
927
+ raise RuntimeError(
928
+ f"claude -p produced unparseable JSON envelope: {exc}; "
929
+ f"first 500 chars of stdout: {proc.stdout[:500]!r}"
930
+ ) from exc
931
+
932
+ raw_content = envelope.get("result", "")
933
+ result = _parse_llm_json(raw_content or "{}")
934
+ usage = envelope.get("usage") or {}
935
+ result["input_tokens"] = (
936
+ int(usage.get("input_tokens", 0) or 0)
937
+ + int(usage.get("cache_read_input_tokens", 0) or 0)
938
+ + int(usage.get("cache_creation_input_tokens", 0) or 0)
939
+ )
940
+ result["output_tokens"] = int(usage.get("output_tokens", 0) or 0)
941
+ model_usage = envelope.get("modelUsage") or {}
942
+ result["model"] = next(iter(model_usage), "claude-code-plan")
943
+ stop_reason = envelope.get("stop_reason", "")
944
+ result["finish_reason"] = "length" if stop_reason == "max_tokens" else "stop"
945
+ if _response_is_hollow(raw_content, result) and result["finish_reason"] != "length":
946
+ print(
947
+ "[graphify] claude-cli returned a hollow response; treating as "
948
+ "truncation so adaptive retry can bisect the chunk.",
949
+ file=sys.stderr,
950
+ )
951
+ result["finish_reason"] = "length"
952
+ return result
953
+
954
+
955
+ def _azure_client(api_key: str, endpoint: str):
956
+ """Construct an AzureOpenAI client with env-driven api_version and timeout."""
957
+ try:
958
+ from openai import AzureOpenAI
959
+ except ImportError as exc:
960
+ raise ImportError(
961
+ "Azure OpenAI requires the openai package. Run: pip install openai"
962
+ ) from exc
963
+ api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-12-01-preview").strip()
964
+ timeout_raw = os.environ.get("GRAPHIFY_API_TIMEOUT", "").strip()
965
+ timeout_s: float = 600.0
966
+ if timeout_raw:
967
+ try:
968
+ v = float(timeout_raw)
969
+ if v > 0:
970
+ timeout_s = v
971
+ except ValueError:
972
+ pass
973
+ return AzureOpenAI(api_key=api_key, azure_endpoint=endpoint, api_version=api_version, timeout=timeout_s)
974
+
975
+
976
+ def _call_azure(
977
+ api_key: str,
978
+ endpoint: str,
979
+ model: str,
980
+ user_message: str,
981
+ temperature: float | None = 0,
982
+ max_tokens: int = 8192,
983
+ *,
984
+ deep_mode: bool = False,
985
+ ) -> dict:
986
+ """Call Azure OpenAI Service via the AzureOpenAI SDK client."""
987
+ client = _azure_client(api_key, endpoint)
988
+ kwargs: dict = {
989
+ "model": model,
990
+ "messages": [
991
+ {"role": "system", "content": _extraction_system(deep=deep_mode)},
992
+ {"role": "user", "content": user_message},
993
+ ],
994
+ "max_completion_tokens": max_tokens,
995
+ }
996
+ if temperature is not None:
997
+ kwargs["temperature"] = temperature
998
+ resp = client.chat.completions.create(**kwargs)
999
+ if not resp.choices or resp.choices[0].message is None:
1000
+ raise ValueError("Azure OpenAI returned empty or filtered response")
1001
+ raw_content = resp.choices[0].message.content
1002
+ result = _parse_llm_json(raw_content or "{}")
1003
+ result["input_tokens"] = resp.usage.prompt_tokens if resp.usage else 0
1004
+ result["output_tokens"] = resp.usage.completion_tokens if resp.usage else 0
1005
+ result["model"] = model
1006
+ result["finish_reason"] = resp.choices[0].finish_reason
1007
+ if _response_is_hollow(raw_content, result) and result["finish_reason"] != "length":
1008
+ print(
1009
+ "[graphify] azure returned a hollow response; treating as "
1010
+ "truncation so adaptive retry can bisect the chunk.",
1011
+ file=sys.stderr,
1012
+ )
1013
+ result["finish_reason"] = "length"
1014
+ return result
1015
+
1016
+
1017
+ def _call_bedrock(model: str, user_message: str, max_tokens: int = 8192, *, deep_mode: bool = False, images: list[_ImageRef] | None = None) -> dict:
1018
+ """Call AWS Bedrock via boto3 Converse API using the standard AWS credential chain."""
1019
+ try:
1020
+ import boto3
1021
+ import botocore.exceptions
1022
+ except ImportError as exc:
1023
+ raise ImportError(
1024
+ "AWS Bedrock extraction requires boto3. Run: pip install graphifyy[bedrock]"
1025
+ ) from exc
1026
+
1027
+ region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") or "us-east-1"
1028
+ profile = os.environ.get("AWS_PROFILE")
1029
+ session = boto3.Session(profile_name=profile, region_name=region)
1030
+ client = session.client("bedrock-runtime")
1031
+
1032
+ try:
1033
+ resp = client.converse(
1034
+ modelId=model,
1035
+ system=[{"text": _extraction_system(deep=deep_mode)}],
1036
+ messages=[{"role": "user", "content": _bedrock_content(user_message, images or [])}],
1037
+ inferenceConfig={"maxTokens": max_tokens, "temperature": 0},
1038
+ )
1039
+ except botocore.exceptions.ClientError as exc:
1040
+ code = exc.response["Error"]["Code"]
1041
+ msg = exc.response["Error"]["Message"]
1042
+ raise RuntimeError(f"Bedrock API error ({code}): {msg}") from exc
1043
+
1044
+ text = resp.get("output", {}).get("message", {}).get("content", [{}])[0].get("text", "{}")
1045
+ result = _parse_llm_json(text)
1046
+ usage = resp.get("usage", {})
1047
+ result["input_tokens"] = usage.get("inputTokens", 0)
1048
+ result["output_tokens"] = usage.get("outputTokens", 0)
1049
+ result["model"] = model
1050
+ result["finish_reason"] = "length" if resp.get("stopReason") == "max_tokens" else "stop"
1051
+ if _response_is_hollow(text, result) and result["finish_reason"] != "length":
1052
+ print(
1053
+ "[graphify] bedrock returned a hollow response; treating as "
1054
+ "truncation so adaptive retry can bisect the chunk.",
1055
+ file=sys.stderr,
1056
+ )
1057
+ result["finish_reason"] = "length"
1058
+ return result
1059
+
1060
+
1061
+ def extract_files_direct(
1062
+ files: list[Path],
1063
+ backend: str | None = None,
1064
+ api_key: str | None = None,
1065
+ model: str | None = None,
1066
+ root: Path = Path("."),
1067
+ *,
1068
+ deep_mode: bool = False,
1069
+ ) -> dict:
1070
+ """Extract semantic nodes/edges from a list of files using the given backend.
1071
+
1072
+ Returns dict with nodes, edges, hyperedges, input_tokens, output_tokens.
1073
+ Raises ValueError for unknown backends or when no API key is configured.
1074
+ Raises ImportError if SDK missing.
1075
+ """
1076
+ if backend is None:
1077
+ backend = detect_backend()
1078
+ if backend is None:
1079
+ raise ValueError(
1080
+ "No LLM backend configured. Set one of: GEMINI_API_KEY, ANTHROPIC_API_KEY, "
1081
+ "OPENAI_API_KEY, DEEPSEEK_API_KEY, MOONSHOT_API_KEY, "
1082
+ "AZURE_OPENAI_API_KEY+AZURE_OPENAI_ENDPOINT, OLLAMA_BASE_URL, "
1083
+ "or AWS credentials. Pass backend= explicitly to select a provider."
1084
+ )
1085
+ if backend not in BACKENDS:
1086
+ raise ValueError(f"Unknown backend {backend!r}. Available: {sorted(BACKENDS)}")
1087
+
1088
+ cfg = BACKENDS[backend]
1089
+ key = api_key or _get_backend_api_key(backend)
1090
+ if not key and backend == "ollama":
1091
+ # Ollama ignores auth but the OpenAI client library requires a non-empty
1092
+ # string. Use a placeholder and surface a visible warning so this never
1093
+ # silently routes traffic without the user realising — see F-029.
1094
+ ollama_url = os.environ.get("OLLAMA_BASE_URL", cfg.get("base_url", ""))
1095
+ _validate_ollama_base_url(ollama_url)
1096
+ print(
1097
+ "[graphify] WARNING: ollama backend selected with no OLLAMA_API_KEY set; "
1098
+ f"sending corpus to {ollama_url}. Set OLLAMA_API_KEY (any non-empty value) "
1099
+ "to suppress this warning.",
1100
+ file=sys.stderr,
1101
+ )
1102
+ key = "ollama"
1103
+ if not key and backend not in ("bedrock", "claude-cli"):
1104
+ raise ValueError(
1105
+ f"No API key for backend '{backend}'. "
1106
+ f"Set {_format_backend_env_keys(backend)} or pass api_key=."
1107
+ )
1108
+ mdl = model or _default_model_for_backend(backend)
1109
+ # Separate raster images from text-like files. Text goes through _read_files
1110
+ # as before; images become structured refs the backend renders as pixels
1111
+ # (vision backends) or as a text reference node (everything else).
1112
+ text_files, image_files = _partition_semantic_files(files)
1113
+ user_msg = _read_files(text_files, root)
1114
+ vision = _backend_supports_vision(backend)
1115
+ # Only base64 (inline) vision backends need the bytes loaded + size-capped;
1116
+ # path-based backends (claude-cli) and non-vision backends do not.
1117
+ read_bytes = vision and backend not in _PATH_IMAGE_BACKENDS
1118
+ image_refs = _build_image_refs(image_files, root, read_bytes=read_bytes) if image_files else []
1119
+ if image_refs and not vision:
1120
+ image_refs = _strip_pixels(image_refs)
1121
+ max_out = _resolve_max_tokens(cfg.get("max_tokens", 8192))
1122
+
1123
+ if backend == "claude":
1124
+ return _call_claude(key, mdl, user_msg, max_tokens=max_out, deep_mode=deep_mode, images=image_refs)
1125
+ if backend == "claude-cli":
1126
+ return _call_claude_cli(user_msg, max_tokens=max_out, deep_mode=deep_mode, images=image_refs)
1127
+ if backend == "bedrock":
1128
+ return _call_bedrock(mdl, user_msg, max_tokens=max_out, deep_mode=deep_mode, images=image_refs)
1129
+ if backend == "azure":
1130
+ endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "").strip()
1131
+ if not endpoint:
1132
+ raise ValueError(
1133
+ "Azure OpenAI backend requires AZURE_OPENAI_ENDPOINT to be set "
1134
+ "(e.g. https://my-resource.openai.azure.com/)."
1135
+ )
1136
+ return _call_azure(
1137
+ key,
1138
+ endpoint,
1139
+ mdl,
1140
+ user_msg,
1141
+ temperature=cfg.get("temperature", 0),
1142
+ max_tokens=max_out,
1143
+ deep_mode=deep_mode,
1144
+ )
1145
+ return _call_openai_compat(
1146
+ cfg["base_url"],
1147
+ key,
1148
+ mdl,
1149
+ user_msg,
1150
+ temperature=cfg.get("temperature", 0),
1151
+ reasoning_effort=cfg.get("reasoning_effort"),
1152
+ max_completion_tokens=_resolve_max_tokens(cfg.get("max_completion_tokens", 8192)),
1153
+ backend=backend,
1154
+ deep_mode=deep_mode,
1155
+ images=image_refs,
1156
+ )
1157
+
1158
+
1159
+ def _estimate_file_tokens(path: Path) -> int:
1160
+ """Estimate the prompt-token cost of a single file under `_read_files` rules.
1161
+
1162
+ Uses tiktoken (`cl100k_base`) when available for accurate counts. Falls back
1163
+ to the chars/4 heuristic if tiktoken is not installed. Both paths cap at
1164
+ `_FILE_CHAR_CAP` to match `_read_files`'s truncation, plus a constant for
1165
+ the `=== rel ===` separator. Returns 0 for unreadable paths so they don't
1166
+ blow up packing.
1167
+ """
1168
+ # Raster images are not read as text; a vision model bills them at a roughly
1169
+ # fixed token cost, so estimate by image count rather than (binary) byte size.
1170
+ if _is_vision_image(path):
1171
+ return _IMAGE_TOKEN_ESTIMATE
1172
+ if _TOKENIZER is None:
1173
+ try:
1174
+ size = path.stat().st_size
1175
+ except OSError:
1176
+ return 0
1177
+ chars = min(size, _FILE_CHAR_CAP) + _PER_FILE_OVERHEAD_CHARS
1178
+ return chars // _CHARS_PER_TOKEN
1179
+
1180
+ try:
1181
+ content = path.read_text(encoding="utf-8", errors="replace")[:_FILE_CHAR_CAP]
1182
+ except OSError:
1183
+ return 0
1184
+ return len(_TOKENIZER.encode(content)) + (_PER_FILE_OVERHEAD_CHARS // _CHARS_PER_TOKEN)
1185
+
1186
+
1187
+ def _pack_chunks_by_tokens(
1188
+ files: list[Path],
1189
+ token_budget: int,
1190
+ ) -> list[list[Path]]:
1191
+ """Greedily pack files into chunks that fit a token budget.
1192
+
1193
+ Files are first grouped by parent directory so related artifacts share a
1194
+ chunk (cross-file edges are more likely to be extracted within a chunk
1195
+ than across chunks). Within each directory, files are added one at a
1196
+ time; a chunk is closed when adding the next file would exceed the
1197
+ budget. A single file larger than the budget gets its own chunk and the
1198
+ caller is expected to handle the API error if it actually overflows the
1199
+ model's context window — packing can't shrink one big file.
1200
+ """
1201
+ if token_budget <= 0:
1202
+ raise ValueError(f"token_budget must be positive, got {token_budget}")
1203
+
1204
+ by_dir: dict[Path, list[Path]] = {}
1205
+ for f in files:
1206
+ by_dir.setdefault(f.parent, []).append(f)
1207
+
1208
+ chunks: list[list[Path]] = []
1209
+ current: list[Path] = []
1210
+ current_tokens = 0
1211
+ current_images = 0
1212
+
1213
+ for directory in sorted(by_dir):
1214
+ for path in by_dir[directory]:
1215
+ cost = _estimate_file_tokens(path)
1216
+ is_image = _is_vision_image(path)
1217
+ over_budget = current_tokens + cost > token_budget
1218
+ over_images = is_image and current_images >= _MAX_IMAGES_PER_CHUNK
1219
+ if current and (over_budget or over_images):
1220
+ chunks.append(current)
1221
+ current = []
1222
+ current_tokens = 0
1223
+ current_images = 0
1224
+ current.append(path)
1225
+ current_tokens += cost
1226
+ current_images += is_image
1227
+
1228
+ if current:
1229
+ chunks.append(current)
1230
+ return chunks
1231
+
1232
+
1233
+ _CONTEXT_EXCEEDED_MARKERS = (
1234
+ "context size",
1235
+ "context length",
1236
+ "context_length",
1237
+ "context window",
1238
+ "n_keep",
1239
+ "exceeds the available",
1240
+ "n_ctx",
1241
+ "maximum context",
1242
+ "too many tokens",
1243
+ "prompt is too long",
1244
+ "context_length_exceeded",
1245
+ )
1246
+
1247
+
1248
+ def _looks_like_context_exceeded(exc: BaseException) -> bool:
1249
+ """Heuristically classify an exception as a context-window overflow.
1250
+
1251
+ Different backends raise different exception types and messages for the
1252
+ same underlying problem ("the prompt + max_completion_tokens did not fit
1253
+ in the model's context window"). We match on substrings of the stringified
1254
+ exception so the retry layer can recover without depending on a specific
1255
+ SDK class. False positives are cheap (we'll re-extract on halves and
1256
+ likely recover); false negatives are expensive (chunk fails entirely).
1257
+ """
1258
+ msg = str(exc).lower()
1259
+ return any(marker in msg for marker in _CONTEXT_EXCEEDED_MARKERS)
1260
+
1261
+
1262
+ def _extract_with_adaptive_retry(
1263
+ chunk: list[Path],
1264
+ backend: str,
1265
+ api_key: str | None,
1266
+ model: str | None,
1267
+ root: Path,
1268
+ max_depth: int,
1269
+ _depth: int = 0,
1270
+ *,
1271
+ deep_mode: bool = False,
1272
+ ) -> dict:
1273
+ """Extract a chunk; if the response is truncated (`finish_reason="length"`)
1274
+ or the API rejects the prompt as too large for the model's context window,
1275
+ split the chunk in half and recurse.
1276
+
1277
+ Three signals drive the retry, all funnelled through the same code:
1278
+
1279
+ - `finish_reason == "length"` — the model accepted the input but ran out of
1280
+ `max_completion_tokens` mid-output. The truncated JSON is unparseable, so
1281
+ we discard it and re-extract on smaller inputs that produce shorter
1282
+ outputs.
1283
+
1284
+ - context-window-exceeded API errors — the model rejected the input
1285
+ outright (HTTP 400 from LM Studio, llama.cpp, vLLM, OpenAI, etc.).
1286
+ Without a retry the whole chunk would fail with no output. Splitting in
1287
+ half is the same recovery as for the `length` case and works for the
1288
+ same reason.
1289
+
1290
+ - hollow successful responses — the model returned HTTP 200 with empty,
1291
+ null, or unparseable content (typical of a local Ollama under load).
1292
+ `_call_openai_compat` re-labels these as `finish_reason="length"` so they
1293
+ take the same recovery path; without that the chunk would be silently
1294
+ dropped from the corpus.
1295
+
1296
+ Recursion is capped at `max_depth` to bound worst-case cost. A chunk of N
1297
+ files can split into up to 2**max_depth pieces — at depth=3 that's 8x. If
1298
+ still failing at the cap, we surface the (likely empty) result with a
1299
+ warning rather than infinite-loop.
1300
+
1301
+ A single-file chunk that overflows is unrecoverable here — we can't make
1302
+ one file smaller than itself, so we return what we got and warn.
1303
+ """
1304
+ try:
1305
+ result = extract_files_direct(
1306
+ chunk, backend=backend, api_key=api_key, model=model, root=root, deep_mode=deep_mode
1307
+ )
1308
+ except Exception as exc: # noqa: BLE001 — re-raise unless it's a known context overflow
1309
+ if not _looks_like_context_exceeded(exc):
1310
+ raise
1311
+ if len(chunk) <= 1:
1312
+ print(
1313
+ f"[graphify] single-file chunk {chunk[0]} exceeds model context "
1314
+ f"and cannot be split further: {exc}",
1315
+ file=sys.stderr,
1316
+ )
1317
+ return {"nodes": [], "edges": [], "hyperedges": [], "input_tokens": 0, "output_tokens": 0, "model": model, "finish_reason": "stop"}
1318
+ if _depth >= max_depth:
1319
+ print(
1320
+ f"[graphify] chunk of {len(chunk)} still overflows context at "
1321
+ f"recursion depth {_depth} (max {max_depth}) — dropping",
1322
+ file=sys.stderr,
1323
+ )
1324
+ return {"nodes": [], "edges": [], "hyperedges": [], "input_tokens": 0, "output_tokens": 0, "model": model, "finish_reason": "stop"}
1325
+ print(
1326
+ f"[graphify] chunk of {len(chunk)} exceeded context at depth "
1327
+ f"{_depth} ({type(exc).__name__}); splitting in half and retrying",
1328
+ file=sys.stderr,
1329
+ )
1330
+ mid = len(chunk) // 2
1331
+ left = _extract_with_adaptive_retry(
1332
+ chunk[:mid], backend, api_key, model, root, max_depth, _depth + 1, deep_mode=deep_mode
1333
+ )
1334
+ right = _extract_with_adaptive_retry(
1335
+ chunk[mid:], backend, api_key, model, root, max_depth, _depth + 1, deep_mode=deep_mode
1336
+ )
1337
+ return {
1338
+ "nodes": left.get("nodes", []) + right.get("nodes", []),
1339
+ "edges": left.get("edges", []) + right.get("edges", []),
1340
+ "hyperedges": left.get("hyperedges", []) + right.get("hyperedges", []),
1341
+ "input_tokens": left.get("input_tokens", 0) + right.get("input_tokens", 0),
1342
+ "output_tokens": left.get("output_tokens", 0) + right.get("output_tokens", 0),
1343
+ "model": model,
1344
+ "finish_reason": "stop",
1345
+ }
1346
+
1347
+ if result.get("finish_reason") != "length":
1348
+ return result
1349
+
1350
+ if len(chunk) <= 1:
1351
+ print(
1352
+ f"[graphify] single-file chunk {chunk[0]} truncated at "
1353
+ f"max_completion_tokens — partial result kept",
1354
+ file=sys.stderr,
1355
+ )
1356
+ return result
1357
+
1358
+ if _depth >= max_depth:
1359
+ print(
1360
+ f"[graphify] chunk of {len(chunk)} still truncated at recursion "
1361
+ f"depth {_depth} (max {max_depth}) — partial result kept",
1362
+ file=sys.stderr,
1363
+ )
1364
+ return result
1365
+
1366
+ print(
1367
+ f"[graphify] chunk of {len(chunk)} truncated at depth {_depth}, "
1368
+ f"splitting into halves of {len(chunk) // 2} and "
1369
+ f"{len(chunk) - len(chunk) // 2}",
1370
+ file=sys.stderr,
1371
+ )
1372
+ mid = len(chunk) // 2
1373
+ left = _extract_with_adaptive_retry(
1374
+ chunk[:mid], backend, api_key, model, root, max_depth, _depth + 1, deep_mode=deep_mode
1375
+ )
1376
+ right = _extract_with_adaptive_retry(
1377
+ chunk[mid:], backend, api_key, model, root, max_depth, _depth + 1, deep_mode=deep_mode
1378
+ )
1379
+
1380
+ return {
1381
+ "nodes": left.get("nodes", []) + right.get("nodes", []),
1382
+ "edges": left.get("edges", []) + right.get("edges", []),
1383
+ "hyperedges": left.get("hyperedges", []) + right.get("hyperedges", []),
1384
+ "input_tokens": left.get("input_tokens", 0) + right.get("input_tokens", 0),
1385
+ "output_tokens": left.get("output_tokens", 0) + right.get("output_tokens", 0),
1386
+ "model": result.get("model"),
1387
+ # Both halves either succeeded or have already surfaced their own
1388
+ # truncation warning; the merged result is no longer truncated as a
1389
+ # logical unit.
1390
+ "finish_reason": "stop",
1391
+ }
1392
+
1393
+
1394
+ def extract_corpus_parallel(
1395
+ files: list[Path],
1396
+ backend: str = "kimi",
1397
+ api_key: str | None = None,
1398
+ model: str | None = None,
1399
+ root: Path = Path("."),
1400
+ chunk_size: int = 20,
1401
+ on_chunk_done: Callable | None = None,
1402
+ token_budget: int | None = 60_000,
1403
+ max_concurrency: int = 4,
1404
+ max_retry_depth: int = 3,
1405
+ deep_mode: bool = False,
1406
+ ) -> dict:
1407
+ """Extract a corpus in chunks, merging results.
1408
+
1409
+ Chunking strategy:
1410
+ - If `token_budget` is set (default 60_000), files are packed to fit
1411
+ the budget and grouped by parent directory. This avoids the worst
1412
+ case where 20 randomly-grouped files exceed a model's context
1413
+ window in a single request.
1414
+ - If `token_budget=None`, falls back to the legacy fixed-count
1415
+ `chunk_size` packing for backwards compatibility.
1416
+
1417
+ Concurrency:
1418
+ - Chunks run in parallel via a thread pool capped at `max_concurrency`
1419
+ (default 4 — conservative to stay under provider rate limits).
1420
+ - Set `max_concurrency=1` to force sequential execution.
1421
+
1422
+ Adaptive retry on truncation:
1423
+ - When the LLM returns `finish_reason="length"` (output truncated at
1424
+ `max_completion_tokens`), the chunk is split in half and each half
1425
+ re-extracted recursively, up to `max_retry_depth` levels deep
1426
+ (default 3 → max 8x expansion of one chunk).
1427
+ - This is signal-driven: chunks too dense to fit in one response
1428
+ self-heal by splitting until they do, while well-sized chunks pay
1429
+ no extra cost. Set `max_retry_depth=0` to disable retries.
1430
+
1431
+ `on_chunk_done(idx, total, chunk_result)` fires once per chunk as it
1432
+ completes (in completion order, not submission order). `idx` is the
1433
+ chunk's submission index so callers can correlate progress. The
1434
+ callback fires once per top-level chunk; recursive splits are merged
1435
+ transparently before the callback is invoked.
1436
+
1437
+ Returns merged dict with nodes, edges, hyperedges, input_tokens,
1438
+ output_tokens. Failed chunks are logged to stderr and skipped — one bad
1439
+ chunk does not abort the run.
1440
+ """
1441
+ if token_budget is not None:
1442
+ chunks = _pack_chunks_by_tokens(files, token_budget=token_budget)
1443
+ else:
1444
+ chunks = [files[i:i + chunk_size] for i in range(0, len(files), chunk_size)]
1445
+
1446
+ merged: dict = {
1447
+ "nodes": [], "edges": [], "hyperedges": [],
1448
+ "input_tokens": 0, "output_tokens": 0,
1449
+ "failed_chunks": 0, # count of chunks that raised — loud failure on chunk errors
1450
+ }
1451
+ total = len(chunks)
1452
+
1453
+ def _run_one(idx: int, chunk: list[Path]) -> tuple[int, dict | None, Exception | None]:
1454
+ t0 = time.time()
1455
+ try:
1456
+ result = _extract_with_adaptive_retry(
1457
+ chunk,
1458
+ backend=backend,
1459
+ api_key=api_key,
1460
+ model=model,
1461
+ root=root,
1462
+ max_depth=max_retry_depth,
1463
+ deep_mode=deep_mode,
1464
+ )
1465
+ result["elapsed_seconds"] = round(time.time() - t0, 2)
1466
+ return idx, result, None
1467
+ except Exception as exc: # noqa: BLE001 — caller-facing surface, log + continue
1468
+ return idx, None, exc
1469
+
1470
+ # Ollama serves one request at a time per loaded model on a single GPU.
1471
+ # Four concurrent 60k-token requests cause VRAM pressure and hollow
1472
+ # responses after 3-4 chunks (#798). Force serial unless the user opts in.
1473
+ if backend == "ollama" and os.environ.get("GRAPHIFY_OLLAMA_PARALLEL", "").strip() != "1":
1474
+ max_concurrency = 1
1475
+ # claude-cli shells out to a Claude Code session; parallel subprocesses conflict
1476
+ # over session state. Force serial unless the user explicitly opts in.
1477
+ if backend == "claude-cli" and os.environ.get("GRAPHIFY_CLAUDE_CLI_PARALLEL", "").strip() != "1":
1478
+ max_concurrency = 1
1479
+ workers = max(1, min(max_concurrency, total))
1480
+ if workers == 1:
1481
+ # Avoid thread pool overhead for single-worker runs (and keep
1482
+ # callback ordering identical to the pre-refactor sequential path).
1483
+ for idx, chunk in enumerate(chunks):
1484
+ _, result, exc = _run_one(idx, chunk)
1485
+ if exc is not None:
1486
+ print(f"[graphify] chunk {idx + 1}/{total} failed: {exc}", file=sys.stderr)
1487
+ merged["failed_chunks"] += 1
1488
+ continue
1489
+ assert result is not None
1490
+ _merge_into(merged, result)
1491
+ if callable(on_chunk_done):
1492
+ on_chunk_done(idx, total, result)
1493
+ else:
1494
+ with ThreadPoolExecutor(max_workers=workers) as pool:
1495
+ futures = [pool.submit(_run_one, idx, chunk) for idx, chunk in enumerate(chunks)]
1496
+ for future in as_completed(futures):
1497
+ idx, result, exc = future.result()
1498
+ if exc is not None:
1499
+ print(
1500
+ f"[graphify] chunk {idx + 1}/{total} failed: {exc}",
1501
+ file=sys.stderr,
1502
+ )
1503
+ merged["failed_chunks"] += 1
1504
+ continue
1505
+ assert result is not None
1506
+ _merge_into(merged, result)
1507
+ if callable(on_chunk_done):
1508
+ on_chunk_done(idx, total, result)
1509
+
1510
+ # Loud failure summary — surface chunk failures at end so they're never
1511
+ # buried mid-log. Exit 0 preserved for caller compatibility; the
1512
+ # summary block makes the problem visible.
1513
+ if merged["failed_chunks"] > 0:
1514
+ print(
1515
+ f"[graphify] WARNING: {merged['failed_chunks']}/{total} semantic chunk(s) failed"
1516
+ " — see errors above. Partial results returned.",
1517
+ file=sys.stderr,
1518
+ )
1519
+ return merged
1520
+
1521
+
1522
+ def _merge_into(merged: dict, result: dict) -> None:
1523
+ """Append a chunk result into the running merged accumulator."""
1524
+ merged["nodes"].extend(result.get("nodes", []))
1525
+ merged["edges"].extend(result.get("edges", []))
1526
+ merged["hyperedges"].extend(result.get("hyperedges", []))
1527
+ merged["input_tokens"] += result.get("input_tokens", 0)
1528
+ merged["output_tokens"] += result.get("output_tokens", 0)
1529
+
1530
+
1531
+ def _call_llm(prompt: str, *, backend: str, max_tokens: int = 200) -> str:
1532
+ """Send a plain-text prompt to `backend` and return the model's text reply.
1533
+
1534
+ Used by lightweight callers (e.g. `graphify.dedup` LLM tiebreaker) that
1535
+ don't need the full extraction prompt or JSON-shaped output. Mirrors the
1536
+ backend dispatch logic of `extract_files_direct` but skips the
1537
+ `_EXTRACTION_SYSTEM` prompt and JSON parsing.
1538
+
1539
+ Previously `graphify.dedup` imported a `_call_llm` symbol that did not
1540
+ exist in this module, so the LLM tiebreaker silently no-op'd on
1541
+ `ImportError` (F-038). Adding the function here re-enables it.
1542
+ """
1543
+ if backend not in BACKENDS:
1544
+ raise ValueError(f"Unknown backend {backend!r}")
1545
+ cfg = BACKENDS[backend]
1546
+ key = _get_backend_api_key(backend)
1547
+ if not key and backend == "ollama":
1548
+ ollama_url = os.environ.get("OLLAMA_BASE_URL", cfg.get("base_url", ""))
1549
+ _validate_ollama_base_url(ollama_url)
1550
+ key = "ollama"
1551
+ if not key and backend not in ("bedrock", "claude-cli"):
1552
+ raise ValueError(
1553
+ f"No API key for backend '{backend}'. Set {_format_backend_env_keys(backend)}."
1554
+ )
1555
+ mdl = _default_model_for_backend(backend)
1556
+
1557
+ if backend == "claude":
1558
+ try:
1559
+ import anthropic
1560
+ except ImportError as exc:
1561
+ raise ImportError(_backend_pkg_hint("anthropic", "anthropic")) from exc
1562
+ client = anthropic.Anthropic(api_key=key)
1563
+ resp = client.messages.create(
1564
+ model=mdl,
1565
+ max_tokens=max_tokens,
1566
+ messages=[{"role": "user", "content": prompt}],
1567
+ )
1568
+ return resp.content[0].text if resp.content else ""
1569
+
1570
+ if backend == "claude-cli":
1571
+ import shutil, subprocess
1572
+ if shutil.which("claude") is None:
1573
+ raise RuntimeError("Claude Code CLI not found on $PATH")
1574
+ proc = subprocess.run(
1575
+ ["claude", "-p", "--output-format", "json", "--no-session-persistence"],
1576
+ input=prompt,
1577
+ capture_output=True,
1578
+ text=True,
1579
+ encoding="utf-8", # Force UTF-8 — prevents UnicodeEncodeError on Windows cp1252
1580
+ timeout=_resolve_api_timeout(),
1581
+ check=False,
1582
+ )
1583
+ if proc.returncode != 0:
1584
+ raise RuntimeError(f"claude -p exited {proc.returncode}: {proc.stderr.strip()[:500]}")
1585
+ try:
1586
+ envelope = json.loads(proc.stdout)
1587
+ except json.JSONDecodeError as exc:
1588
+ raise RuntimeError(f"claude -p produced unparseable JSON envelope: {exc}") from exc
1589
+ return envelope.get("result", "")
1590
+
1591
+
1592
+ if backend == "bedrock":
1593
+ try:
1594
+ import boto3
1595
+ except ImportError as exc:
1596
+ raise ImportError(_backend_pkg_hint("boto3", "bedrock")) from exc
1597
+ region = os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION") or "us-east-1"
1598
+ profile = os.environ.get("AWS_PROFILE")
1599
+ session = boto3.Session(profile_name=profile, region_name=region)
1600
+ client = session.client("bedrock-runtime")
1601
+ resp = client.converse(
1602
+ modelId=mdl,
1603
+ messages=[{"role": "user", "content": [{"text": prompt}]}],
1604
+ inferenceConfig={"maxTokens": max_tokens, "temperature": 0},
1605
+ )
1606
+ return resp.get("output", {}).get("message", {}).get("content", [{}])[0].get("text", "")
1607
+
1608
+ if backend == "azure":
1609
+ endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT", "").strip()
1610
+ if not endpoint:
1611
+ raise ValueError(
1612
+ "Azure OpenAI backend requires AZURE_OPENAI_ENDPOINT to be set."
1613
+ )
1614
+ azure_client = _azure_client(key, endpoint)
1615
+ resp = azure_client.chat.completions.create(
1616
+ model=mdl,
1617
+ messages=[{"role": "user", "content": prompt}],
1618
+ max_completion_tokens=max_tokens,
1619
+ temperature=cfg.get("temperature", 0),
1620
+ )
1621
+ if not resp.choices or resp.choices[0].message is None:
1622
+ raise ValueError("Azure OpenAI returned empty or filtered response")
1623
+ return resp.choices[0].message.content or ""
1624
+
1625
+ # OpenAI-compatible (kimi, openai, gemini, ollama)
1626
+ try:
1627
+ from openai import OpenAI
1628
+ except ImportError as exc:
1629
+ raise ImportError(_backend_pkg_hint("openai", "openai")) from exc
1630
+ client = OpenAI(api_key=key, base_url=cfg["base_url"])
1631
+ kwargs: dict = {
1632
+ "model": mdl,
1633
+ "messages": [{"role": "user", "content": prompt}],
1634
+ "max_completion_tokens": max_tokens,
1635
+ }
1636
+ temperature = cfg.get("temperature", 0)
1637
+ if temperature is not None:
1638
+ kwargs["temperature"] = temperature
1639
+ if cfg.get("reasoning_effort"):
1640
+ kwargs["reasoning_effort"] = cfg["reasoning_effort"]
1641
+ if "moonshot" in cfg["base_url"]:
1642
+ kwargs["extra_body"] = {"thinking": {"type": "disabled"}}
1643
+ resp = client.chat.completions.create(**kwargs)
1644
+ if not resp.choices or resp.choices[0].message is None:
1645
+ raise ValueError("LLM returned empty or filtered response")
1646
+ return resp.choices[0].message.content or ""
1647
+
1648
+
1649
+ def estimate_cost(backend: str, input_tokens: int, output_tokens: int) -> float:
1650
+ """Estimate USD cost for a given token count using published pricing."""
1651
+ if backend not in BACKENDS:
1652
+ return 0.0
1653
+ p = BACKENDS[backend]["pricing"]
1654
+ return (input_tokens * p["input"] + output_tokens * p["output"]) / 1_000_000
1655
+
1656
+
1657
+ def _ollama_host_is_link_local_or_metadata(host: str) -> bool:
1658
+ """True if *host* is, or resolves to, a link-local / cloud-metadata address.
1659
+
1660
+ Resolves the name so an alias pointing at 169.254.169.254 is caught too, not
1661
+ just a literal IP. General private/LAN addresses are deliberately NOT treated
1662
+ as metadata: people do run Ollama on trusted LAN boxes, so those only warn.
1663
+ """
1664
+ import ipaddress
1665
+ import socket
1666
+ if host in ("metadata.google.internal", "metadata.google.com", "0.0.0.0", "::", "[::]"): # nosec B104 - blocklist, not a bind
1667
+ return True
1668
+ if host.startswith("169.254."): # link-local literal, includes the metadata IP
1669
+ return True
1670
+ try:
1671
+ infos = socket.getaddrinfo(host, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
1672
+ except (socket.gaierror, UnicodeError, OSError):
1673
+ return False
1674
+ for info in infos:
1675
+ try:
1676
+ ip = ipaddress.ip_address(info[4][0])
1677
+ except ValueError:
1678
+ continue
1679
+ if ip.is_link_local: # 169.254.0.0/16 and fe80::/10 (includes the metadata IP)
1680
+ return True
1681
+ return False
1682
+
1683
+
1684
+ def _validate_ollama_base_url(url: str, *, warn: bool = True) -> None:
1685
+ """Warn if OLLAMA_BASE_URL looks unsafe; hard-block link-local/metadata (F3).
1686
+
1687
+ Sending an entire corpus to a non-loopback http:// endpoint silently leaks
1688
+ proprietary code, but some users genuinely run Ollama on a LAN host they
1689
+ trust, so a general non-loopback target only warns. A link-local or cloud
1690
+ metadata address (169.254.x, metadata.google.*, or any host that resolves to
1691
+ one) is never a legitimate Ollama host and is a classic SSRF target, so we
1692
+ fail closed with a ValueError there regardless of *warn*. Pass warn=False for
1693
+ an early gate that should hard-block but leave the user-facing warning to the
1694
+ later in-flow call.
1695
+ """
1696
+ try:
1697
+ from urllib.parse import urlparse
1698
+ parsed = urlparse(url)
1699
+ except Exception:
1700
+ if warn:
1701
+ print(
1702
+ f"[graphify] WARNING: OLLAMA_BASE_URL={url!r} is not a parseable URL.",
1703
+ file=sys.stderr,
1704
+ )
1705
+ return
1706
+ if parsed.scheme not in ("http", "https"):
1707
+ if warn:
1708
+ print(
1709
+ f"[graphify] WARNING: OLLAMA_BASE_URL has unexpected scheme {parsed.scheme!r}; "
1710
+ "expected http or https.",
1711
+ file=sys.stderr,
1712
+ )
1713
+ return
1714
+ host = (parsed.hostname or "").lower()
1715
+ if _ollama_host_is_link_local_or_metadata(host):
1716
+ raise ValueError(
1717
+ f"OLLAMA_BASE_URL points at a link-local/metadata address ({host!r}); refusing to "
1718
+ "send the corpus there. Set it to a real Ollama host."
1719
+ )
1720
+ is_loopback = host in ("localhost", "127.0.0.1", "::1") or host.startswith("127.")
1721
+ if warn and not is_loopback:
1722
+ scheme_note = " (UNENCRYPTED)" if parsed.scheme == "http" else ""
1723
+ print(
1724
+ f"[graphify] WARNING: OLLAMA_BASE_URL points to non-loopback host {host!r}{scheme_note}. "
1725
+ "Your full corpus will be sent to that endpoint. "
1726
+ "Set OLLAMA_BASE_URL=http://localhost:11434/v1 to keep extraction local.",
1727
+ file=sys.stderr,
1728
+ )
1729
+
1730
+
1731
+ def detect_backend() -> str | None:
1732
+ """Return the name of whichever backend has an API key set, or None.
1733
+
1734
+ Priority: gemini → kimi → claude → openai → deepseek → azure → bedrock → ollama (last, opt-in).
1735
+
1736
+ Ollama is intentionally checked LAST so a paid API key (Anthropic/OpenAI/etc.)
1737
+ is never silently shadowed by an incidental OLLAMA_BASE_URL in the environment
1738
+ — see security finding F-002/F-029. Setting OLLAMA_BASE_URL alongside a paid
1739
+ key now keeps you on the paid backend; remove the paid key (or pass
1740
+ --backend ollama explicitly) to route to the local model.
1741
+ """
1742
+ for backend in ("gemini", "kimi", "claude", "openai", "deepseek"):
1743
+ if _get_backend_api_key(backend):
1744
+ return backend
1745
+ if _get_backend_api_key("azure") and os.environ.get("AZURE_OPENAI_ENDPOINT"):
1746
+ return "azure"
1747
+ if os.environ.get("AWS_PROFILE") or os.environ.get("AWS_REGION") or os.environ.get("AWS_DEFAULT_REGION"):
1748
+ return "bedrock"
1749
+ ollama_url = os.environ.get("OLLAMA_BASE_URL")
1750
+ if ollama_url:
1751
+ _validate_ollama_base_url(ollama_url)
1752
+ return "ollama"
1753
+ for name in BACKENDS:
1754
+ if name not in ("gemini", "kimi", "claude", "openai", "deepseek", "azure", "bedrock", "ollama", "claude-cli"):
1755
+ if _get_backend_api_key(name):
1756
+ return name
1757
+ return None
1758
+
1759
+
1760
+ # ── Community labeling ────────────────────────────────────────────────────────
1761
+ # When graphify runs inside an orchestrating agent (Claude Code / Gemini CLI),
1762
+ # the agent names communities itself per skill.md Step 5 - it reads the analysis
1763
+ # file and writes 2-5 word names with its own reasoning, no API call. When
1764
+ # graphify is run as a bare CLI (``graphify extract . --backend X``), there is no
1765
+ # agent to do that step, so community labels stay ``Community 0/1/2...``. These
1766
+ # helpers fill that gap: ask the configured backend to name communities in ONE
1767
+ # batched call and return a complete ``{cid: name}`` map (#1097).
1768
+
1769
+ _LABEL_FENCE_RE = re.compile(r"^\s*```(?:json)?\s*|\s*```\s*$", re.IGNORECASE)
1770
+ _LABEL_MAX_COMMUNITIES = 200 # cap LLM-named communities; tail stays placeholder
1771
+ _LABEL_TOP_K = 12 # node labels sampled per community for the prompt
1772
+ _LABEL_MAXLEN = 60 # truncate individual labels to keep the prompt small
1773
+
1774
+
1775
+ def _placeholder_community_labels(communities) -> dict[int, str]:
1776
+ return {int(cid): f"Community {cid}" for cid in communities}
1777
+
1778
+
1779
+ def _community_label_lines(G, communities, gods, max_communities, top_k):
1780
+ """One prompt line per community (largest first), sampling up to ``top_k``
1781
+ representative node labels (god nodes first). Returns (lines, labeled_cids);
1782
+ skips communities with no resolvable nodes."""
1783
+ # gods may be node-id strings or god_nodes() dicts ({"id": ..., "label": ...}).
1784
+ god_set = {g["id"] if isinstance(g, dict) else g for g in (gods or [])}
1785
+ ordered = sorted(communities.items(), key=lambda kv: -len(kv[1]))
1786
+ lines: list[str] = []
1787
+ labeled_cids: list[int] = []
1788
+ for cid, members in ordered[:max_communities]:
1789
+ ranked = [m for m in members if m in god_set] + [m for m in members if m not in god_set]
1790
+ names: list[str] = []
1791
+ seen: set[str] = set()
1792
+ for nid in ranked:
1793
+ label = str(G.nodes[nid].get("label", nid)) if nid in G.nodes else str(nid)
1794
+ label = label.strip().strip("()")[:_LABEL_MAXLEN]
1795
+ if label and label.lower() not in seen:
1796
+ seen.add(label.lower())
1797
+ names.append(label)
1798
+ if len(names) >= top_k:
1799
+ break
1800
+ if names:
1801
+ lines.append(f"Community {cid}: {', '.join(names)}")
1802
+ labeled_cids.append(int(cid))
1803
+ return lines, labeled_cids
1804
+
1805
+
1806
+ def _parse_label_response(text: str, labeled_cids: list[int]) -> dict[int, str]:
1807
+ """Parse the backend's JSON ``{cid: name}`` reply. Raises on non-JSON or a
1808
+ non-object payload; silently ignores cids it didn't name."""
1809
+ cleaned = _LABEL_FENCE_RE.sub("", text.strip())
1810
+ if not cleaned.startswith("{"):
1811
+ start, end = cleaned.find("{"), cleaned.rfind("}")
1812
+ if start != -1 and end > start:
1813
+ cleaned = cleaned[start:end + 1]
1814
+ data = json.loads(cleaned)
1815
+ if not isinstance(data, dict):
1816
+ raise ValueError("label response is not a JSON object")
1817
+ out: dict[int, str] = {}
1818
+ for cid in labeled_cids:
1819
+ name = data.get(str(cid))
1820
+ if name is None:
1821
+ name = data.get(cid)
1822
+ if isinstance(name, str) and name.strip():
1823
+ out[cid] = name.strip()
1824
+ return out
1825
+
1826
+
1827
+ def label_communities(
1828
+ G,
1829
+ communities,
1830
+ *,
1831
+ backend: str,
1832
+ gods=None,
1833
+ max_communities: int = _LABEL_MAX_COMMUNITIES,
1834
+ top_k: int = _LABEL_TOP_K,
1835
+ ) -> dict[int, str]:
1836
+ """Return a complete ``{cid: name}`` map using ``backend`` for naming.
1837
+
1838
+ Placeholders (``Community N``) are used for any community the backend did not
1839
+ name. Raises on backend/parse failure - callers that want graceful
1840
+ degradation should use :func:`generate_community_labels`.
1841
+ """
1842
+ labels = _placeholder_community_labels(communities)
1843
+ lines, labeled_cids = _community_label_lines(G, communities, gods, max_communities, top_k)
1844
+ if not lines:
1845
+ return labels
1846
+
1847
+ prompt = (
1848
+ "You are naming clusters in a knowledge graph. For each community below, "
1849
+ "return a concise 2-5 word plain-language name describing what it is about "
1850
+ "(e.g. \"Order Management\", \"Payment Flow\", \"Auth Middleware\"). "
1851
+ "Respond ONLY with a JSON object mapping the community id (as a string) to "
1852
+ "its name - no prose, no markdown fences.\n\n" + "\n".join(lines)
1853
+ )
1854
+
1855
+ max_tokens = min(40 + 16 * len(labeled_cids), 4096)
1856
+ text = _call_llm(prompt, backend=backend, max_tokens=max_tokens)
1857
+ labels.update(_parse_label_response(text, labeled_cids))
1858
+ return labels
1859
+
1860
+
1861
+ def generate_community_labels(
1862
+ G,
1863
+ communities,
1864
+ *,
1865
+ backend: str | None = None,
1866
+ gods=None,
1867
+ quiet: bool = False,
1868
+ ) -> tuple[dict[int, str], str]:
1869
+ """CLI entry point: resolve a backend, name communities, and degrade to
1870
+ ``Community N`` placeholders on any failure (no backend, API error, malformed
1871
+ reply). Returns ``(labels, source)`` where source is ``"llm"`` or
1872
+ ``"placeholder"``. Never raises."""
1873
+ if backend is None:
1874
+ try:
1875
+ backend = detect_backend()
1876
+ except Exception:
1877
+ backend = None
1878
+ if not backend:
1879
+ if not quiet:
1880
+ print(
1881
+ "[graphify label] no LLM backend configured; keeping Community N "
1882
+ "placeholders. Set an API key (e.g. GOOGLE_API_KEY) or pass --backend.",
1883
+ file=sys.stderr,
1884
+ )
1885
+ return _placeholder_community_labels(communities), "placeholder"
1886
+ try:
1887
+ labels = label_communities(G, communities, backend=backend, gods=gods)
1888
+ return labels, "llm"
1889
+ except Exception as exc:
1890
+ if not quiet:
1891
+ print(
1892
+ f"[graphify label] warning: community labeling failed ({exc}); "
1893
+ "using Community N placeholders.",
1894
+ file=sys.stderr,
1895
+ )
1896
+ return _placeholder_community_labels(communities), "placeholder"