wegho-agentes 7.0.3 → 7.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1483) hide show
  1. package/.agent/.shared/ui-ux-pro-max/data/charts.csv +26 -0
  2. package/.agent/.shared/ui-ux-pro-max/data/colors.csv +97 -0
  3. package/.agent/.shared/ui-ux-pro-max/data/icons.csv +101 -0
  4. package/.agent/.shared/ui-ux-pro-max/data/landing.csv +31 -0
  5. package/.agent/.shared/ui-ux-pro-max/data/products.csv +97 -0
  6. package/.agent/.shared/ui-ux-pro-max/data/prompts.csv +24 -0
  7. package/.agent/.shared/ui-ux-pro-max/data/react-performance.csv +45 -0
  8. package/.agent/.shared/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  9. package/.agent/.shared/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  10. package/.agent/.shared/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  11. package/.agent/.shared/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  12. package/.agent/.shared/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  13. package/.agent/.shared/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  14. package/.agent/.shared/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  15. package/.agent/.shared/ui-ux-pro-max/data/stacks/react.csv +54 -0
  16. package/.agent/.shared/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  17. package/.agent/.shared/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  18. package/.agent/.shared/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  19. package/.agent/.shared/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  20. package/.agent/.shared/ui-ux-pro-max/data/styles.csv +59 -0
  21. package/.agent/.shared/ui-ux-pro-max/data/typography.csv +58 -0
  22. package/.agent/.shared/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  23. package/.agent/.shared/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  24. package/.agent/.shared/ui-ux-pro-max/data/web-interface.csv +31 -0
  25. package/.agent/.shared/ui-ux-pro-max/scripts/core.py +258 -0
  26. package/.agent/.shared/ui-ux-pro-max/scripts/design_system.py +1067 -0
  27. package/.agent/.shared/ui-ux-pro-max/scripts/search.py +106 -0
  28. package/.agent/ARCHITECTURE.md +288 -0
  29. package/.agent/agents/backend-specialist.md +263 -0
  30. package/.agent/agents/code-archaeologist.md +106 -0
  31. package/.agent/agents/database-architect.md +226 -0
  32. package/.agent/agents/debugger.md +225 -0
  33. package/.agent/agents/devops-engineer.md +242 -0
  34. package/.agent/agents/documentation-writer.md +104 -0
  35. package/.agent/agents/explorer-agent.md +73 -0
  36. package/.agent/agents/frontend-specialist.md +593 -0
  37. package/.agent/agents/game-developer.md +162 -0
  38. package/.agent/agents/mobile-developer.md +377 -0
  39. package/.agent/agents/orchestrator.md +416 -0
  40. package/.agent/agents/penetration-tester.md +188 -0
  41. package/.agent/agents/performance-optimizer.md +187 -0
  42. package/.agent/agents/product-manager.md +112 -0
  43. package/.agent/agents/product-owner.md +95 -0
  44. package/.agent/agents/project-planner.md +406 -0
  45. package/.agent/agents/qa-automation-engineer.md +103 -0
  46. package/.agent/agents/security-auditor.md +170 -0
  47. package/.agent/agents/seo-specialist.md +111 -0
  48. package/.agent/agents/test-engineer.md +158 -0
  49. package/.agent/mcp_config.json +24 -0
  50. package/.agent/mcp_config.md +30 -0
  51. package/.agent/rules/GEMINI.md +308 -0
  52. package/.agent/scripts/auto_preview.py +148 -0
  53. package/.agent/scripts/checklist.py +217 -0
  54. package/.agent/scripts/session_manager.py +120 -0
  55. package/.agent/scripts/verify_all.py +327 -0
  56. package/.agent/skills/api-patterns/SKILL.md +81 -0
  57. package/.agent/skills/api-patterns/api-style.md +42 -0
  58. package/.agent/skills/api-patterns/auth.md +24 -0
  59. package/.agent/skills/api-patterns/documentation.md +26 -0
  60. package/.agent/skills/api-patterns/graphql.md +41 -0
  61. package/.agent/skills/api-patterns/rate-limiting.md +31 -0
  62. package/.agent/skills/api-patterns/response.md +37 -0
  63. package/.agent/skills/api-patterns/rest.md +40 -0
  64. package/.agent/skills/api-patterns/scripts/api_validator.py +211 -0
  65. package/.agent/skills/api-patterns/security-testing.md +122 -0
  66. package/.agent/skills/api-patterns/trpc.md +41 -0
  67. package/.agent/skills/api-patterns/versioning.md +22 -0
  68. package/.agent/skills/app-builder/SKILL.md +75 -0
  69. package/.agent/skills/app-builder/agent-coordination.md +71 -0
  70. package/.agent/skills/app-builder/feature-building.md +53 -0
  71. package/.agent/skills/app-builder/project-detection.md +34 -0
  72. package/.agent/skills/app-builder/scaffolding.md +118 -0
  73. package/.agent/skills/app-builder/tech-stack.md +41 -0
  74. package/.agent/skills/app-builder/templates/SKILL.md +39 -0
  75. package/.agent/skills/app-builder/templates/astro-static/TEMPLATE.md +76 -0
  76. package/.agent/skills/app-builder/templates/chrome-extension/TEMPLATE.md +92 -0
  77. package/.agent/skills/app-builder/templates/cli-tool/TEMPLATE.md +88 -0
  78. package/.agent/skills/app-builder/templates/electron-desktop/TEMPLATE.md +88 -0
  79. package/.agent/skills/app-builder/templates/express-api/TEMPLATE.md +83 -0
  80. package/.agent/skills/app-builder/templates/flutter-app/TEMPLATE.md +90 -0
  81. package/.agent/skills/app-builder/templates/monorepo-turborepo/TEMPLATE.md +90 -0
  82. package/.agent/skills/app-builder/templates/nextjs-fullstack/TEMPLATE.md +122 -0
  83. package/.agent/skills/app-builder/templates/nextjs-saas/TEMPLATE.md +122 -0
  84. package/.agent/skills/app-builder/templates/nextjs-static/TEMPLATE.md +169 -0
  85. package/.agent/skills/app-builder/templates/nuxt-app/TEMPLATE.md +134 -0
  86. package/.agent/skills/app-builder/templates/python-fastapi/TEMPLATE.md +83 -0
  87. package/.agent/skills/app-builder/templates/react-native-app/TEMPLATE.md +119 -0
  88. package/.agent/skills/architecture/SKILL.md +55 -0
  89. package/.agent/skills/architecture/context-discovery.md +43 -0
  90. package/.agent/skills/architecture/examples.md +94 -0
  91. package/.agent/skills/architecture/pattern-selection.md +68 -0
  92. package/.agent/skills/architecture/patterns-reference.md +50 -0
  93. package/.agent/skills/architecture/trade-off-analysis.md +77 -0
  94. package/.agent/skills/bash-linux/SKILL.md +199 -0
  95. package/.agent/skills/behavioral-modes/SKILL.md +242 -0
  96. package/.agent/skills/brainstorming/SKILL.md +163 -0
  97. package/.agent/skills/brainstorming/dynamic-questioning.md +350 -0
  98. package/.agent/skills/clean-code/SKILL.md +201 -0
  99. package/.agent/skills/code-review-checklist/SKILL.md +109 -0
  100. package/.agent/skills/database-design/SKILL.md +103 -0
  101. package/.agent/skills/database-design/database-selection.md +43 -0
  102. package/.agent/skills/database-design/github-benchmarks.md +35 -0
  103. package/.agent/skills/database-design/indexing.md +39 -0
  104. package/.agent/skills/database-design/migrations.md +48 -0
  105. package/.agent/skills/database-design/optimization.md +36 -0
  106. package/.agent/skills/database-design/orm-selection.md +30 -0
  107. package/.agent/skills/database-design/schema-design.md +56 -0
  108. package/.agent/skills/database-design/scripts/schema_validator.py +172 -0
  109. package/.agent/skills/database-design/supabase-security-egress.md +42 -0
  110. package/.agent/skills/deployment-procedures/SKILL.md +241 -0
  111. package/.agent/skills/doc.md +177 -0
  112. package/.agent/skills/documentation-templates/SKILL.md +194 -0
  113. package/.agent/skills/file-doc-sync/SKILL.md +44 -0
  114. package/.agent/skills/file-doc-sync/references/document-structure.md +20 -0
  115. package/.agent/skills/file-doc-sync/scripts/sync-file-docs.md +45 -0
  116. package/.agent/skills/file-doc-sync/scripts/sync-file-docs.ts +349 -0
  117. package/.agent/skills/frontend-design/SKILL.md +452 -0
  118. package/.agent/skills/frontend-design/animation-guide.md +331 -0
  119. package/.agent/skills/frontend-design/color-system.md +311 -0
  120. package/.agent/skills/frontend-design/decision-trees.md +418 -0
  121. package/.agent/skills/frontend-design/motion-graphics.md +306 -0
  122. package/.agent/skills/frontend-design/scripts/accessibility_checker.py +183 -0
  123. package/.agent/skills/frontend-design/scripts/ux_audit.py +722 -0
  124. package/.agent/skills/frontend-design/typography-system.md +345 -0
  125. package/.agent/skills/frontend-design/ux-psychology.md +1116 -0
  126. package/.agent/skills/frontend-design/visual-effects.md +383 -0
  127. package/.agent/skills/game-development/2d-games/SKILL.md +119 -0
  128. package/.agent/skills/game-development/3d-games/SKILL.md +135 -0
  129. package/.agent/skills/game-development/SKILL.md +167 -0
  130. package/.agent/skills/game-development/game-art/SKILL.md +185 -0
  131. package/.agent/skills/game-development/game-audio/SKILL.md +190 -0
  132. package/.agent/skills/game-development/game-design/SKILL.md +129 -0
  133. package/.agent/skills/game-development/mobile-games/SKILL.md +108 -0
  134. package/.agent/skills/game-development/multiplayer/SKILL.md +132 -0
  135. package/.agent/skills/game-development/pc-games/SKILL.md +144 -0
  136. package/.agent/skills/game-development/vr-ar/SKILL.md +123 -0
  137. package/.agent/skills/game-development/web-games/SKILL.md +150 -0
  138. package/.agent/skills/geo-fundamentals/SKILL.md +156 -0
  139. package/.agent/skills/geo-fundamentals/scripts/geo_checker.py +289 -0
  140. package/.agent/skills/i18n-localization/SKILL.md +154 -0
  141. package/.agent/skills/i18n-localization/scripts/i18n_checker.py +241 -0
  142. package/.agent/skills/intelligent-routing/SKILL.md +335 -0
  143. package/.agent/skills/lint-and-validate/SKILL.md +45 -0
  144. package/.agent/skills/lint-and-validate/scripts/lint_runner.py +184 -0
  145. package/.agent/skills/lint-and-validate/scripts/type_coverage.py +173 -0
  146. package/.agent/skills/mcp-builder/SKILL.md +176 -0
  147. package/.agent/skills/mobile-design/SKILL.md +394 -0
  148. package/.agent/skills/mobile-design/decision-trees.md +516 -0
  149. package/.agent/skills/mobile-design/mobile-backend.md +491 -0
  150. package/.agent/skills/mobile-design/mobile-color-system.md +420 -0
  151. package/.agent/skills/mobile-design/mobile-debugging.md +122 -0
  152. package/.agent/skills/mobile-design/mobile-design-thinking.md +357 -0
  153. package/.agent/skills/mobile-design/mobile-navigation.md +458 -0
  154. package/.agent/skills/mobile-design/mobile-performance.md +767 -0
  155. package/.agent/skills/mobile-design/mobile-testing.md +356 -0
  156. package/.agent/skills/mobile-design/mobile-typography.md +433 -0
  157. package/.agent/skills/mobile-design/platform-android.md +666 -0
  158. package/.agent/skills/mobile-design/platform-ios.md +561 -0
  159. package/.agent/skills/mobile-design/scripts/mobile_audit.py +670 -0
  160. package/.agent/skills/mobile-design/touch-psychology.md +537 -0
  161. package/.agent/skills/nextjs-react-expert/1-async-eliminating-waterfalls.md +351 -0
  162. package/.agent/skills/nextjs-react-expert/2-bundle-bundle-size-optimization.md +240 -0
  163. package/.agent/skills/nextjs-react-expert/3-server-server-side-performance.md +490 -0
  164. package/.agent/skills/nextjs-react-expert/4-client-client-side-data-fetching.md +264 -0
  165. package/.agent/skills/nextjs-react-expert/5-rerender-re-render-optimization.md +581 -0
  166. package/.agent/skills/nextjs-react-expert/6-rendering-rendering-performance.md +432 -0
  167. package/.agent/skills/nextjs-react-expert/7-js-javascript-performance.md +684 -0
  168. package/.agent/skills/nextjs-react-expert/8-advanced-advanced-patterns.md +150 -0
  169. package/.agent/skills/nextjs-react-expert/9-cache-components.md +103 -0
  170. package/.agent/skills/nextjs-react-expert/SKILL.md +293 -0
  171. package/.agent/skills/nextjs-react-expert/scripts/convert_rules.py +222 -0
  172. package/.agent/skills/nextjs-react-expert/scripts/react_performance_checker.py +252 -0
  173. package/.agent/skills/nodejs-best-practices/SKILL.md +333 -0
  174. package/.agent/skills/parallel-agents/SKILL.md +175 -0
  175. package/.agent/skills/performance-profiling/SKILL.md +143 -0
  176. package/.agent/skills/performance-profiling/scripts/lighthouse_audit.py +76 -0
  177. package/.agent/skills/plan-writing/SKILL.md +152 -0
  178. package/.agent/skills/powershell-windows/SKILL.md +167 -0
  179. package/.agent/skills/python-patterns/SKILL.md +441 -0
  180. package/.agent/skills/red-team-tactics/SKILL.md +199 -0
  181. package/.agent/skills/rust-pro/SKILL.md +176 -0
  182. package/.agent/skills/seo-fundamentals/SKILL.md +129 -0
  183. package/.agent/skills/seo-fundamentals/scripts/seo_checker.py +219 -0
  184. package/.agent/skills/server-management/SKILL.md +161 -0
  185. package/.agent/skills/systematic-debugging/SKILL.md +109 -0
  186. package/.agent/skills/tailwind-patterns/SKILL.md +269 -0
  187. package/.agent/skills/tdd-workflow/SKILL.md +149 -0
  188. package/.agent/skills/testing-patterns/SKILL.md +178 -0
  189. package/.agent/skills/testing-patterns/scripts/test_runner.py +219 -0
  190. package/.agent/skills/vulnerability-scanner/SKILL.md +322 -0
  191. package/.agent/skills/vulnerability-scanner/checklists.md +121 -0
  192. package/.agent/skills/vulnerability-scanner/github-benchmarks.md +32 -0
  193. package/.agent/skills/vulnerability-scanner/scripts/security_scan.py +458 -0
  194. package/.agent/skills/web-design-guidelines/SKILL.md +57 -0
  195. package/.agent/skills/webapp-testing/SKILL.md +187 -0
  196. package/.agent/skills/webapp-testing/scripts/playwright_runner.py +173 -0
  197. package/.agent/skills/wegho-global-rules/SKILL.md +36 -6
  198. package/.agent/workflows/PROMPT_GUIDE.md +16 -203
  199. package/.agent/workflows/auto-run-orchestrator.md +3 -3
  200. package/.agent/workflows/brainstorm.md +113 -0
  201. package/.agent/workflows/create.md +59 -0
  202. package/.agent/workflows/debug.md +103 -0
  203. package/.agent/workflows/deploy.md +176 -0
  204. package/.agent/workflows/enhance.md +63 -0
  205. package/.agent/workflows/orchestrate.md +237 -0
  206. package/.agent/workflows/plan.md +89 -0
  207. package/.agent/workflows/preview.md +81 -0
  208. package/.agent/workflows/run-agents-workflow.md +15 -186
  209. package/.agent/workflows/status.md +86 -0
  210. package/.agent/workflows/test.md +144 -0
  211. package/.agent/workflows/ui-ux-pro-max.md +296 -0
  212. package/.agents/AGENT_WORKFLOW.md +36 -528
  213. package/.agents/CLI.md +42 -222
  214. package/.agents/README.md +18 -130
  215. package/.agents/antigravity-preflight.md +36 -0
  216. package/.agents/antigravity-preflight.ts +21 -80
  217. package/.agents/auto-runner.md +36 -0
  218. package/.agents/auto-runner.ts +2 -2
  219. package/.agents/cli.ts +2 -2
  220. package/.agents/code-auditor-agent.md +38 -0
  221. package/.agents/config.md +34 -0
  222. package/.agents/config.ts +15 -29
  223. package/.agents/context-loader.md +37 -0
  224. package/.agents/core/agent-parallelizer.md +33 -0
  225. package/.agents/core/ai-agents-agent.md +35 -0
  226. package/.agents/core/architecture-agent.md +39 -0
  227. package/.agents/core/architecture-agent.ts +1 -1
  228. package/.agents/core/automation-agent.md +33 -0
  229. package/.agents/core/backend-agent.md +35 -0
  230. package/.agents/core/base-agent.md +36 -0
  231. package/.agents/core/base-agent.ts +132 -337
  232. package/.agents/core/build-manager.md +35 -0
  233. package/.agents/core/cache-manager.md +32 -0
  234. package/.agents/core/checkpoint-manager.md +36 -0
  235. package/.agents/core/cloud-agent.md +33 -0
  236. package/.agents/core/cro-agent.md +33 -0
  237. package/.agents/core/database-agent.md +35 -0
  238. package/.agents/core/devops-agent.md +33 -0
  239. package/.agents/core/documentation-agent.md +36 -0
  240. package/.agents/core/documentation-agent.ts +1 -1
  241. package/.agents/core/file-generator.md +42 -0
  242. package/.agents/core/frontend-agent.md +36 -0
  243. package/.agents/core/frontend-agent.ts +1 -1
  244. package/.agents/core/nextjs-agent.md +35 -0
  245. package/.agents/core/pentest-agent.md +35 -0
  246. package/.agents/core/performance-tracker.md +33 -0
  247. package/.agents/core/planning-agent.md +33 -0
  248. package/.agents/core/planning-agent.ts +77 -388
  249. package/.agents/core/quality-agent.md +36 -0
  250. package/.agents/core/quality-agent.ts +1 -1
  251. package/.agents/core/rag-agent.md +36 -0
  252. package/.agents/core/report-generator.md +35 -0
  253. package/.agents/core/retry-utility.md +34 -0
  254. package/.agents/core/security-agent.md +39 -0
  255. package/.agents/core/security-agent.ts +1 -1
  256. package/.agents/core/skill-manager.md +36 -0
  257. package/.agents/core/stack-boundary-agent.md +38 -0
  258. package/.agents/core/testing-agent.md +33 -0
  259. package/.agents/core/ui-data-map-agent.md +38 -0
  260. package/.agents/core/uiux-agent.md +35 -0
  261. package/.agents/core/workflow-validator.md +33 -0
  262. package/.agents/core/workflow-validator.ts +98 -158
  263. package/.agents/domains/README.md +10 -53
  264. package/.agents/domains/logistics/route-agent.md +34 -0
  265. package/.agents/domains/logistics/route-agent.ts +1 -1
  266. package/.agents/domains/news/cms-agent.md +35 -0
  267. package/.agents/domains/news/cms-agent.ts +1 -1
  268. package/.agents/domains/news/seo-agent.md +34 -0
  269. package/.agents/domains/news/seo-agent.ts +1 -1
  270. package/.agents/domains/production/production-control-agent.md +34 -0
  271. package/.agents/domains/production/production-control-agent.ts +1 -1
  272. package/.agents/init.md +40 -0
  273. package/.agents/init.ts +81 -168
  274. package/.agents/install.md +34 -0
  275. package/.agents/install.ts +115 -138
  276. package/.agents/orchestrator.md +43 -0
  277. package/.agents/orchestrator.ts +322 -764
  278. package/.agents/project-discovery-agent.md +40 -0
  279. package/.agents/reference-repositories.json +5 -0
  280. package/.agents/reference-repositories.md +30 -0
  281. package/.agents/security/vulnerability-db.md +42 -0
  282. package/.agents/sync-docs.md +32 -0
  283. package/.agents/sync-docs.ts +15 -0
  284. package/.agents/task-analyzer-agent.md +36 -0
  285. package/.agents/task-analyzer-agent.ts +122 -478
  286. package/.agents/validate.md +36 -0
  287. package/.agents/validate.ts +1 -1
  288. package/INSTALL.md +18 -300
  289. package/README.md +20 -332
  290. package/package.json +19 -3
  291. package/skills/algorithmic-art/templates/generator_template.md +31 -0
  292. package/skills/algorithmic-art/templates/viewer.md +31 -0
  293. package/skills/app-store-optimization/expected_output.md +30 -0
  294. package/skills/app-store-optimization/sample_input.md +30 -0
  295. package/skills/cc-skill-continuous-learning/config.md +30 -0
  296. package/skills/claude-d3js-skill/assets/chart-template.md +32 -0
  297. package/skills/claude-d3js-skill/assets/interactive-template.md +32 -0
  298. package/skills/claude-d3js-skill/assets/sample-data.md +30 -0
  299. package/skills/loki-mode/scripts/take-screenshots.md +33 -0
  300. package/skills/playwright-skill/lib/helpers.md +34 -0
  301. package/skills/playwright-skill/package.md +30 -0
  302. package/skills/playwright-skill/run.md +36 -0
  303. package/skills/postgres-best-practices/metadata.md +31 -0
  304. package/skills/pptx-official/scripts/html2pptx.md +38 -0
  305. package/skills/react-best-practices/metadata.md +30 -0
  306. package/skills/remotion-best-practices/rules/assets/charts-bar-chart.md +33 -0
  307. package/skills/remotion-best-practices/rules/assets/text-animations-typewriter.md +32 -0
  308. package/skills/remotion-best-practices/rules/assets/text-animations-word-highlight.md +34 -0
  309. package/skills/systematic-debugging/condition-based-waiting-example.md +34 -0
  310. package/skills/typescript-expert/references/tsconfig-strict.md +30 -0
  311. package/skills/typescript-expert/references/utility-types.md +32 -0
  312. package/skills/writing-skills/render-graphs.md +32 -0
  313. package/.agents/AI_COMPATIBILITY.md +0 -333
  314. package/.agents/core/feedback-collector.ts +0 -207
  315. package/.agents/core/inventory-agent.ts +0 -757
  316. package/.agents/core/memory-system.ts +0 -429
  317. package/.agents/memory/ai-agents-agent/failures.json +0 -1
  318. package/.agents/memory/ai-agents-agent/learnings.json +0 -1
  319. package/.agents/memory/ai-agents-agent/specialty.md +0 -3
  320. package/.agents/memory/ai-agents-agent/successes.json +0 -1
  321. package/.agents/memory/architecture-agent/failures.json +0 -1
  322. package/.agents/memory/architecture-agent/learnings.json +0 -1
  323. package/.agents/memory/architecture-agent/specialty.md +0 -31
  324. package/.agents/memory/architecture-agent/successes.json +0 -1
  325. package/.agents/memory/automation-agent/failures.json +0 -1
  326. package/.agents/memory/automation-agent/learnings.json +0 -1
  327. package/.agents/memory/automation-agent/specialty.md +0 -3
  328. package/.agents/memory/automation-agent/successes.json +0 -1
  329. package/.agents/memory/backend-agent/failures.json +0 -1
  330. package/.agents/memory/backend-agent/learnings.json +0 -1
  331. package/.agents/memory/backend-agent/specialty.md +0 -3
  332. package/.agents/memory/backend-agent/successes.json +0 -1
  333. package/.agents/memory/cloud-agent/failures.json +0 -1
  334. package/.agents/memory/cloud-agent/learnings.json +0 -1
  335. package/.agents/memory/cloud-agent/specialty.md +0 -3
  336. package/.agents/memory/cloud-agent/successes.json +0 -1
  337. package/.agents/memory/cms-agent/failures.json +0 -1
  338. package/.agents/memory/cms-agent/learnings.json +0 -1
  339. package/.agents/memory/cms-agent/specialty.md +0 -30
  340. package/.agents/memory/cms-agent/successes.json +0 -1
  341. package/.agents/memory/cro-agent/failures.json +0 -1
  342. package/.agents/memory/cro-agent/learnings.json +0 -1
  343. package/.agents/memory/cro-agent/specialty.md +0 -3
  344. package/.agents/memory/cro-agent/successes.json +0 -1
  345. package/.agents/memory/database-agent/failures.json +0 -1
  346. package/.agents/memory/database-agent/learnings.json +0 -1
  347. package/.agents/memory/database-agent/specialty.md +0 -3
  348. package/.agents/memory/database-agent/successes.json +0 -1
  349. package/.agents/memory/devops-agent/failures.json +0 -1
  350. package/.agents/memory/devops-agent/learnings.json +0 -1
  351. package/.agents/memory/devops-agent/specialty.md +0 -3
  352. package/.agents/memory/devops-agent/successes.json +0 -1
  353. package/.agents/memory/documentation-agent/failures.json +0 -1
  354. package/.agents/memory/documentation-agent/learnings.json +0 -1
  355. package/.agents/memory/documentation-agent/specialty.md +0 -33
  356. package/.agents/memory/documentation-agent/successes.json +0 -1
  357. package/.agents/memory/frontend-agent/failures.json +0 -1
  358. package/.agents/memory/frontend-agent/learnings.json +0 -1
  359. package/.agents/memory/frontend-agent/specialty.md +0 -30
  360. package/.agents/memory/frontend-agent/successes.json +0 -1
  361. package/.agents/memory/inventory-agent/failures.json +0 -1
  362. package/.agents/memory/inventory-agent/inventory/index.json +0 -1
  363. package/.agents/memory/inventory-agent/inventory/types.json +0 -1
  364. package/.agents/memory/inventory-agent/inventory/variables.json +0 -1
  365. package/.agents/memory/inventory-agent/learnings.json +0 -1
  366. package/.agents/memory/inventory-agent/specialty.md +0 -129
  367. package/.agents/memory/inventory-agent/successes.json +0 -1
  368. package/.agents/memory/nextjs-agent/failures.json +0 -1
  369. package/.agents/memory/nextjs-agent/learnings.json +0 -1
  370. package/.agents/memory/nextjs-agent/specialty.md +0 -3
  371. package/.agents/memory/nextjs-agent/successes.json +0 -1
  372. package/.agents/memory/pentest-agent/failures.json +0 -1
  373. package/.agents/memory/pentest-agent/learnings.json +0 -1
  374. package/.agents/memory/pentest-agent/specialty.md +0 -3
  375. package/.agents/memory/pentest-agent/successes.json +0 -1
  376. package/.agents/memory/planning-agent/specialty.md +0 -13
  377. package/.agents/memory/production-control-agent/failures.json +0 -1
  378. package/.agents/memory/production-control-agent/learnings.json +0 -1
  379. package/.agents/memory/production-control-agent/specialty.md +0 -29
  380. package/.agents/memory/production-control-agent/successes.json +0 -1
  381. package/.agents/memory/quality-agent/failures.json +0 -1
  382. package/.agents/memory/quality-agent/learnings.json +0 -1
  383. package/.agents/memory/quality-agent/specialty.md +0 -31
  384. package/.agents/memory/quality-agent/successes.json +0 -1
  385. package/.agents/memory/rag-agent/failures.json +0 -1
  386. package/.agents/memory/rag-agent/learnings.json +0 -1
  387. package/.agents/memory/rag-agent/specialty.md +0 -3
  388. package/.agents/memory/rag-agent/successes.json +0 -1
  389. package/.agents/memory/reference-repositories.json +0 -271
  390. package/.agents/memory/route-agent/failures.json +0 -1
  391. package/.agents/memory/route-agent/learnings.json +0 -1
  392. package/.agents/memory/route-agent/specialty.md +0 -29
  393. package/.agents/memory/route-agent/successes.json +0 -1
  394. package/.agents/memory/security-agent/failures.json +0 -1
  395. package/.agents/memory/security-agent/learnings.json +0 -1
  396. package/.agents/memory/security-agent/specialty.md +0 -31
  397. package/.agents/memory/security-agent/successes.json +0 -1
  398. package/.agents/memory/seo-agent/failures.json +0 -1
  399. package/.agents/memory/seo-agent/learnings.json +0 -1
  400. package/.agents/memory/seo-agent/specialty.md +0 -31
  401. package/.agents/memory/seo-agent/successes.json +0 -1
  402. package/.agents/memory/stack-boundary-agent/failures.json +0 -1
  403. package/.agents/memory/stack-boundary-agent/learnings.json +0 -1
  404. package/.agents/memory/stack-boundary-agent/specialty.md +0 -3
  405. package/.agents/memory/stack-boundary-agent/successes.json +0 -1
  406. package/.agents/memory/testing-agent/failures.json +0 -1
  407. package/.agents/memory/testing-agent/learnings.json +0 -1
  408. package/.agents/memory/testing-agent/specialty.md +0 -3
  409. package/.agents/memory/testing-agent/successes.json +0 -1
  410. package/.agents/memory/ui-data-map-agent/failures.json +0 -1
  411. package/.agents/memory/ui-data-map-agent/learnings.json +0 -1
  412. package/.agents/memory/ui-data-map-agent/specialty.md +0 -3
  413. package/.agents/memory/ui-data-map-agent/successes.json +0 -1
  414. package/.agents/memory/uiux-agent/failures.json +0 -1
  415. package/.agents/memory/uiux-agent/learnings.json +0 -1
  416. package/.agents/memory/uiux-agent/specialty.md +0 -3
  417. package/.agents/memory/uiux-agent/successes.json +0 -1
  418. package/docs/LEARNING_SYSTEM.md +0 -326
  419. package/docs/SYSTEM_FLOW_AUDIT.md +0 -115
  420. package/skills/loki-mode/.github/workflows/claude-code-review.yml +0 -57
  421. package/skills/loki-mode/.github/workflows/claude.yml +0 -50
  422. package/skills/loki-mode/.github/workflows/release.yml +0 -128
  423. package/skills/loki-mode/autonomy/.loki/dashboard/index.html +0 -497
  424. package/skills/loki-mode/benchmarks/datasets/humaneval.jsonl +0 -164
  425. package/skills/loki-mode/benchmarks/datasets/swebench-lite.json +0 -10
  426. package/skills/loki-mode/benchmarks/prepare-submission.sh +0 -215
  427. package/skills/loki-mode/benchmarks/results/2026-01-05-00-23-56/SUMMARY.md +0 -48
  428. package/skills/loki-mode/benchmarks/results/2026-01-05-00-23-56/humaneval-results.json +0 -15
  429. package/skills/loki-mode/benchmarks/results/2026-01-05-00-23-56/swebench-results.json +0 -10
  430. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/SUMMARY.md +0 -50
  431. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-results.json +0 -1000
  432. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/0.py +0 -16
  433. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/1.py +0 -28
  434. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/10.py +0 -25
  435. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/100.py +0 -20
  436. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/101.py +0 -15
  437. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/102.py +0 -16
  438. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/103.py +0 -15
  439. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/104.py +0 -22
  440. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/105.py +0 -39
  441. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/106.py +0 -21
  442. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/107.py +0 -35
  443. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/108.py +0 -22
  444. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/109.py +0 -41
  445. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/11.py +0 -16
  446. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/110.py +0 -20
  447. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/111.py +0 -28
  448. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/112.py +0 -14
  449. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/113.py +0 -19
  450. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/114.py +0 -16
  451. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/115.py +0 -41
  452. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/116.py +0 -12
  453. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/117.py +0 -25
  454. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/118.py +0 -26
  455. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/119.py +0 -30
  456. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/12.py +0 -21
  457. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/120.py +0 -28
  458. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/121.py +0 -10
  459. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/122.py +0 -19
  460. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/123.py +0 -31
  461. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/124.py +0 -56
  462. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/125.py +0 -20
  463. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/126.py +0 -29
  464. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/127.py +0 -47
  465. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/128.py +0 -25
  466. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/129.py +0 -61
  467. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/13.py +0 -10
  468. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/130.py +0 -29
  469. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/131.py +0 -18
  470. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/132.py +0 -36
  471. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/133.py +0 -16
  472. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/134.py +0 -27
  473. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/135.py +0 -15
  474. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/136.py +0 -19
  475. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/137.py +0 -26
  476. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/138.py +0 -8
  477. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/139.py +0 -18
  478. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/14.py +0 -9
  479. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/140.py +0 -30
  480. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/141.py +0 -35
  481. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/142.py +0 -20
  482. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/143.py +0 -35
  483. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/144.py +0 -19
  484. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/145.py +0 -22
  485. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/146.py +0 -21
  486. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/147.py +0 -22
  487. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/148.py +0 -28
  488. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/149.py +0 -17
  489. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/15.py +0 -8
  490. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/150.py +0 -15
  491. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/151.py +0 -20
  492. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/152.py +0 -16
  493. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/153.py +0 -30
  494. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/154.py +0 -15
  495. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/155.py +0 -15
  496. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/156.py +0 -32
  497. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/157.py +0 -12
  498. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/158.py +0 -11
  499. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/159.py +0 -33
  500. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/16.py +0 -8
  501. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/160.py +0 -29
  502. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/161.py +0 -24
  503. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/162.py +0 -11
  504. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/163.py +0 -14
  505. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/17.py +0 -27
  506. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/18.py +0 -23
  507. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/19.py +0 -21
  508. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/2.py +0 -10
  509. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/20.py +0 -22
  510. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/21.py +0 -13
  511. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/22.py +0 -11
  512. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/23.py +0 -8
  513. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/24.py +0 -9
  514. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/25.py +0 -24
  515. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/26.py +0 -12
  516. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/27.py +0 -6
  517. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/28.py +0 -11
  518. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/29.py +0 -11
  519. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/3.py +0 -18
  520. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/30.py +0 -8
  521. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/31.py +0 -27
  522. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/32.py +0 -50
  523. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/33.py +0 -20
  524. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/34.py +0 -6
  525. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/35.py +0 -8
  526. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/36.py +0 -14
  527. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/37.py +0 -14
  528. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/38.py +0 -11
  529. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/39.py +0 -35
  530. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/4.py +0 -14
  531. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/40.py +0 -24
  532. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/41.py +0 -13
  533. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/42.py +0 -8
  534. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/43.py +0 -22
  535. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/44.py +0 -18
  536. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/45.py +0 -6
  537. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/46.py +0 -23
  538. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/47.py +0 -14
  539. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/48.py +0 -13
  540. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/49.py +0 -14
  541. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/5.py +0 -19
  542. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/50.py +0 -12
  543. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/51.py +0 -18
  544. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/52.py +0 -8
  545. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/53.py +0 -8
  546. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/54.py +0 -17
  547. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/55.py +0 -18
  548. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/56.py +0 -22
  549. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/57.py +0 -16
  550. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/58.py +0 -9
  551. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/59.py +0 -22
  552. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/6.py +0 -26
  553. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/60.py +0 -14
  554. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/61.py +0 -22
  555. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/62.py +0 -10
  556. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/63.py +0 -25
  557. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/64.py +0 -20
  558. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/65.py +0 -14
  559. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/66.py +0 -14
  560. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/67.py +0 -16
  561. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/68.py +0 -50
  562. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/69.py +0 -21
  563. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/7.py +0 -11
  564. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/70.py +0 -26
  565. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/71.py +0 -17
  566. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/72.py +0 -21
  567. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/73.py +0 -17
  568. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/74.py +0 -21
  569. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/75.py +0 -30
  570. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/76.py +0 -20
  571. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/77.py +0 -17
  572. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/78.py +0 -24
  573. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/79.py +0 -13
  574. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/8.py +0 -17
  575. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/80.py +0 -18
  576. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/81.py +0 -54
  577. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/82.py +0 -20
  578. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/83.py +0 -21
  579. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/84.py +0 -16
  580. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/85.py +0 -12
  581. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/86.py +0 -16
  582. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/87.py +0 -29
  583. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/88.py +0 -24
  584. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/89.py +0 -23
  585. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/9.py +0 -20
  586. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/90.py +0 -20
  587. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/91.py +0 -20
  588. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/92.py +0 -25
  589. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/93.py +0 -34
  590. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/94.py +0 -33
  591. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/95.py +0 -27
  592. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/96.py +0 -28
  593. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/97.py +0 -13
  594. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/98.py +0 -15
  595. package/skills/loki-mode/benchmarks/results/2026-01-05-00-49-17/humaneval-solutions/99.py +0 -30
  596. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/SUMMARY.md +0 -48
  597. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-12907.patch +0 -16
  598. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-14182.patch +0 -59
  599. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-14365.patch +0 -21
  600. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-14995.patch +0 -15
  601. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-6938.patch +0 -20
  602. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/astropy__astropy-7746.patch +0 -31
  603. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-10914.patch +0 -72
  604. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-10924.patch +0 -41
  605. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11001.patch +0 -80
  606. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11019.patch +0 -489
  607. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11039.patch +0 -87
  608. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11049.patch +0 -24
  609. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11099.patch +0 -28
  610. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11133.patch +0 -20
  611. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11179.patch +0 -49
  612. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11283.patch +0 -47
  613. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11422.patch +0 -25
  614. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11564.patch +0 -285
  615. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11583.patch +0 -22
  616. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11620.patch +0 -21
  617. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11630.patch +0 -45
  618. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11742.patch +0 -32
  619. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11797.patch +0 -38
  620. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11815.patch +0 -20
  621. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11848.patch +0 -21
  622. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11905.patch +0 -16
  623. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11910.patch +0 -29
  624. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11964.patch +0 -54
  625. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-11999.patch +0 -15
  626. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12113.patch +0 -31
  627. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12125.patch +0 -27
  628. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12184.patch +0 -28
  629. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12284.patch +0 -31
  630. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12286.patch +0 -29
  631. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12308.patch +0 -22
  632. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12453.patch +0 -31
  633. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12470.patch +0 -53
  634. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12497.patch +0 -26
  635. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12589.patch +0 -157
  636. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12700.patch +0 -27
  637. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12708.patch +0 -64
  638. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12747.patch +0 -15
  639. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12856.patch +0 -16
  640. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12908.patch +0 -15
  641. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12915.patch +0 -82
  642. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-12983.patch +0 -21
  643. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13028.patch +0 -13
  644. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13033.patch +0 -17
  645. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13158.patch +0 -14
  646. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13220.patch +0 -57
  647. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13230.patch +0 -14
  648. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13265.patch +0 -44
  649. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13315.patch +0 -53
  650. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13321.patch +0 -33
  651. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13401.patch +0 -30
  652. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13447.patch +0 -40
  653. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13448.patch +0 -203
  654. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13551.patch +0 -17
  655. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13590.patch +0 -17
  656. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13658.patch +0 -19
  657. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13660.patch +0 -27
  658. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13710.patch +0 -23
  659. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13757.patch +0 -51
  660. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13768.patch +0 -32
  661. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13925.patch +0 -116
  662. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13933.patch +0 -62
  663. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-13964.patch +0 -17
  664. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14016.patch +0 -138
  665. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14017.patch +0 -88
  666. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14155.patch +0 -74
  667. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14238.patch +0 -132
  668. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14382.patch +0 -17
  669. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14411.patch +0 -16
  670. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14534.patch +0 -29
  671. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14580.patch +0 -15
  672. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14608.patch +0 -32
  673. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14667.patch +0 -33
  674. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14672.patch +0 -59
  675. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14730.patch +0 -80
  676. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14752.patch +0 -35
  677. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14787.patch +0 -24
  678. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14855.patch +0 -23
  679. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14915.patch +0 -19
  680. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14997.patch +0 -40
  681. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-14999.patch +0 -26
  682. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15061.patch +0 -24
  683. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15202.patch +0 -18
  684. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15213.patch +0 -70
  685. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15252.patch +0 -63
  686. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15320.patch +0 -36
  687. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15347.patch +0 -36
  688. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15388.patch +0 -26
  689. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15400.patch +0 -14
  690. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15498.patch +0 -14
  691. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15695.patch +0 -32
  692. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15738.patch +0 -251
  693. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15781.patch +0 -18
  694. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15789.patch +0 -50
  695. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15790.patch +0 -34
  696. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15814.patch +0 -18
  697. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15819.patch +0 -127
  698. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15851.patch +0 -16
  699. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15902.patch +0 -15
  700. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-15996.patch +0 -33
  701. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16041.patch +0 -30
  702. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16046.patch +0 -16
  703. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16139.patch +0 -19
  704. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16229.patch +0 -142
  705. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16255.patch +0 -21
  706. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16379.patch +0 -32
  707. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16400.patch +0 -34
  708. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16408.patch +0 -27
  709. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16527.patch +0 -14
  710. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16595.patch +0 -14
  711. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16816.patch +0 -30
  712. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16820.patch +0 -188
  713. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-16873.patch +0 -37
  714. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-17051.patch +0 -51
  715. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/django__django-17087.patch +0 -21
  716. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-18869.patch +0 -75
  717. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-22711.patch +0 -42
  718. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-22835.patch +0 -58
  719. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23299.patch +0 -92
  720. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23314.patch +0 -16
  721. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23476.patch +0 -73
  722. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23562.patch +0 -33
  723. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23563.patch +0 -22
  724. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23913.patch +0 -207
  725. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23964.patch +0 -29
  726. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-23987.patch +0 -43
  727. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-24149.patch +0 -27
  728. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-24265.patch +0 -76
  729. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-24334.patch +0 -17
  730. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-24970.patch +0 -24
  731. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25079.patch +0 -15
  732. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25311.patch +0 -29
  733. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25332.patch +0 -28
  734. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25433.patch +0 -240
  735. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25442.patch +0 -28
  736. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-25498.patch +0 -79
  737. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-26011.patch +0 -90
  738. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/matplotlib__matplotlib-26020.patch +0 -35
  739. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/mwaskom__seaborn-2848.patch +0 -55
  740. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/mwaskom__seaborn-3010.patch +0 -60
  741. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/mwaskom__seaborn-3190.patch +0 -18
  742. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/mwaskom__seaborn-3407.patch +0 -28
  743. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pallets__flask-4045.patch +0 -23
  744. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pallets__flask-4992.patch +0 -33
  745. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pallets__flask-5063.patch +0 -99
  746. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-1963.patch +0 -117
  747. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-2148.patch +0 -37
  748. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-2317.patch +0 -54
  749. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-2674.patch +0 -157
  750. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-3362.patch +0 -19
  751. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/psf__requests-863.patch +0 -35
  752. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-3364.patch +0 -392
  753. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-4094.patch +0 -40
  754. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-4248.patch +0 -124
  755. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-4493.patch +0 -39
  756. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pydata__xarray-5131.patch +0 -45
  757. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-5859.patch +0 -13
  758. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-6506.patch +0 -25
  759. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-7080.patch +0 -33
  760. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-7114.patch +0 -161
  761. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-7228.patch +0 -34
  762. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pylint-dev__pylint-7993.patch +0 -21
  763. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-11143.patch +0 -25
  764. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-11148.patch +0 -57
  765. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5103.patch +0 -345
  766. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5221.patch +0 -28
  767. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5227.patch +0 -16
  768. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5413.patch +0 -35
  769. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5495.patch +0 -24
  770. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-5692.patch +0 -33
  771. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-6116.patch +0 -12
  772. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-7168.patch +0 -28
  773. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-7220.patch +0 -535
  774. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-7373.patch +0 -47
  775. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-7432.patch +0 -76
  776. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-7490.patch +0 -61
  777. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-8365.patch +0 -123
  778. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-8906.patch +0 -38
  779. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/pytest-dev__pytest-9359.patch +0 -22
  780. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-10297.patch +0 -53
  781. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-10508.patch +0 -17
  782. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-10949.patch +0 -94
  783. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-11040.patch +0 -39
  784. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-11281.patch +0 -65
  785. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-12471.patch +0 -54
  786. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13142.patch +0 -35
  787. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13241.patch +0 -29
  788. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13439.patch +0 -34
  789. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13496.patch +0 -59
  790. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13497.patch +0 -15
  791. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13584.patch +0 -25
  792. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-13779.patch +0 -16
  793. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-14087.patch +0 -33
  794. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-14092.patch +0 -35
  795. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-14894.patch +0 -24
  796. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-14983.patch +0 -23
  797. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-15512.patch +0 -77
  798. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-15535.patch +0 -44
  799. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-25500.patch +0 -64
  800. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-25570.patch +0 -96
  801. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-25638.patch +0 -52
  802. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/scikit-learn__scikit-learn-25747.patch +0 -29
  803. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-10325.patch +0 -185
  804. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-10451.patch +0 -129
  805. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-11445.patch +0 -119
  806. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-7686.patch +0 -19
  807. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-7738.patch +0 -37
  808. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-7975.patch +0 -89
  809. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8273.patch +0 -38
  810. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8282.patch +0 -95
  811. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8435.patch +0 -104
  812. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8474.patch +0 -73
  813. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8506.patch +0 -49
  814. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8595.patch +0 -15
  815. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8627.patch +0 -50
  816. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8713.patch +0 -41
  817. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8721.patch +0 -25
  818. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sphinx-doc__sphinx-8801.patch +0 -73
  819. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-11400.patch +0 -27
  820. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-11870.patch +0 -96
  821. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-11897.patch +0 -134
  822. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-12171.patch +0 -17
  823. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-12236.patch +0 -20
  824. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-12419.patch +0 -35
  825. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-12454.patch +0 -55
  826. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-12481.patch +0 -68
  827. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13031.patch +0 -33
  828. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13043.patch +0 -35
  829. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13146.patch +0 -32
  830. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13177.patch +0 -36
  831. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13437.patch +0 -15
  832. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13471.patch +0 -26
  833. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13480.patch +0 -13
  834. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13647.patch +0 -41
  835. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13773.patch +0 -22
  836. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13895.patch +0 -28
  837. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13915.patch +0 -97
  838. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-13971.patch +0 -19
  839. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14024.patch +0 -27
  840. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14308.patch +0 -20
  841. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14317.patch +0 -42
  842. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14396.patch +0 -38
  843. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14774.patch +0 -13
  844. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-14817.patch +0 -54
  845. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15011.patch +0 -38
  846. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15308.patch +0 -21
  847. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15345.patch +0 -27
  848. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15346.patch +0 -26
  849. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15609.patch +0 -29
  850. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-15678.patch +0 -92
  851. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-16106.patch +0 -78
  852. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-16281.patch +0 -141
  853. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-16503.patch +0 -69
  854. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-16792.patch +0 -16
  855. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-16988.patch +0 -16
  856. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-17022.patch +0 -65
  857. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-17139.patch +0 -33
  858. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-17630.patch +0 -90
  859. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-17655.patch +0 -23
  860. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18057.patch +0 -28
  861. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18087.patch +0 -81
  862. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18189.patch +0 -13
  863. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18199.patch +0 -23
  864. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18532.patch +0 -130
  865. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18621.patch +0 -15
  866. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18698.patch +0 -105
  867. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-18835.patch +0 -30
  868. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-19007.patch +0 -66
  869. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-19254.patch +0 -72
  870. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-19487.patch +0 -23
  871. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20049.patch +0 -125
  872. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20154.patch +0 -46
  873. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20212.patch +0 -17
  874. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20322.patch +0 -24
  875. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20442.patch +0 -27
  876. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20590.patch +0 -23
  877. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-20639.patch +0 -66
  878. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21055.patch +0 -56
  879. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21171.patch +0 -36
  880. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21379.patch +0 -36
  881. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21612.patch +0 -26
  882. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21614.patch +0 -37
  883. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21627.patch +0 -26
  884. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-21847.patch +0 -24
  885. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-22005.patch +0 -36
  886. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-22714.patch +0 -486
  887. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-22840.patch +0 -76
  888. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-23117.patch +0 -24
  889. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-23191.patch +0 -302
  890. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-23262.patch +0 -13
  891. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-24066.patch +0 -66
  892. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-24102.patch +0 -12
  893. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-24152.patch +0 -25
  894. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-24213.patch +0 -25
  895. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-patches/sympy__sympy-24909.patch +0 -19
  896. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-predictions.json +0 -1502
  897. package/skills/loki-mode/benchmarks/results/2026-01-05-01-24-17/swebench-results.json +0 -1516
  898. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/SUMMARY.md +0 -48
  899. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-12907.patch +0 -31
  900. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-14182.patch +0 -42
  901. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-14365.patch +0 -50
  902. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-14995.patch +0 -17
  903. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-6938.patch +0 -13
  904. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/astropy__astropy-7746.patch +0 -21
  905. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-10914.patch +0 -76
  906. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-10924.patch +0 -33
  907. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11001.patch +0 -35
  908. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11019.patch +0 -90
  909. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11039.patch +0 -37
  910. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11049.patch +0 -10
  911. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11099.patch +0 -24
  912. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11133.patch +0 -21
  913. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11179.patch +0 -47
  914. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11283.patch +0 -35
  915. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11422.patch +0 -18
  916. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11564.patch +0 -62
  917. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11583.patch +0 -25
  918. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11620.patch +0 -19
  919. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11630.patch +0 -79
  920. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11742.patch +0 -38
  921. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11797.patch +0 -15
  922. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11815.patch +0 -20
  923. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11848.patch +0 -19
  924. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11905.patch +0 -18
  925. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11910.patch +0 -17
  926. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11964.patch +0 -31
  927. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-11999.patch +0 -26
  928. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12113.patch +0 -27
  929. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12125.patch +0 -17
  930. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12184.patch +0 -83
  931. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12284.patch +0 -21
  932. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12286.patch +0 -21
  933. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12308.patch +0 -24
  934. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12453.patch +0 -31
  935. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12470.patch +0 -137
  936. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12497.patch +0 -21
  937. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12589.patch +0 -28
  938. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12700.patch +0 -17
  939. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12708.patch +0 -22
  940. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12747.patch +0 -43
  941. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12856.patch +0 -61
  942. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12908.patch +0 -19
  943. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12915.patch +0 -39
  944. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-12983.patch +0 -21
  945. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-13028.patch +0 -13
  946. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-13033.patch +0 -26
  947. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-13158.patch +0 -12
  948. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-patches/django__django-13220.patch +0 -55
  949. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-predictions.json +0 -252
  950. package/skills/loki-mode/benchmarks/results/2026-01-05-01-35-39/swebench-results.json +0 -266
  951. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/SUMMARY.md +0 -32
  952. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-12907.patch +0 -23
  953. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-14182.patch +0 -19
  954. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-14365.patch +0 -44
  955. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-14995.patch +0 -19
  956. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-6938.patch +0 -14
  957. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/astropy__astropy-7746.patch +0 -28
  958. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-10914.patch +0 -76
  959. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-10924.patch +0 -25
  960. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11001.patch +0 -20
  961. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11019.patch +0 -158
  962. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11039.patch +0 -38
  963. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11049.patch +0 -18
  964. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11099.patch +0 -24
  965. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11133.patch +0 -15
  966. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11179.patch +0 -14
  967. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11283.patch +0 -29
  968. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11422.patch +0 -21
  969. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11564.patch +0 -140
  970. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11583.patch +0 -24
  971. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11620.patch +0 -18
  972. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11630.patch +0 -38
  973. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11742.patch +0 -49
  974. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11797.patch +0 -18
  975. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11815.patch +0 -22
  976. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11848.patch +0 -24
  977. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11905.patch +0 -32
  978. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11910.patch +0 -100
  979. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11964.patch +0 -62
  980. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-11999.patch +0 -20
  981. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12113.patch +0 -24
  982. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12125.patch +0 -17
  983. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12184.patch +0 -82
  984. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12284.patch +0 -15
  985. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12286.patch +0 -41
  986. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12308.patch +0 -31
  987. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12453.patch +0 -32
  988. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12470.patch +0 -21
  989. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12497.patch +0 -17
  990. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12589.patch +0 -56
  991. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12700.patch +0 -28
  992. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12708.patch +0 -61
  993. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12747.patch +0 -15
  994. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12856.patch +0 -62
  995. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12908.patch +0 -21
  996. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12915.patch +0 -18
  997. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-12983.patch +0 -21
  998. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13028.patch +0 -18
  999. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13033.patch +0 -15
  1000. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13158.patch +0 -15
  1001. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13220.patch +0 -126
  1002. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13230.patch +0 -24
  1003. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13265.patch +0 -87
  1004. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13315.patch +0 -16
  1005. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13321.patch +0 -29
  1006. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13401.patch +0 -31
  1007. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13447.patch +0 -42
  1008. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13448.patch +0 -94
  1009. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13551.patch +0 -16
  1010. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13590.patch +0 -15
  1011. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13658.patch +0 -22
  1012. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13660.patch +0 -22
  1013. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13710.patch +0 -42
  1014. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13757.patch +0 -32
  1015. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13768.patch +0 -32
  1016. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13925.patch +0 -20
  1017. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13933.patch +0 -31
  1018. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-13964.patch +0 -15
  1019. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14016.patch +0 -103
  1020. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14017.patch +0 -20
  1021. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14155.patch +0 -121
  1022. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14238.patch +0 -14
  1023. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14382.patch +0 -15
  1024. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14411.patch +0 -16
  1025. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14534.patch +0 -15
  1026. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14580.patch +0 -15
  1027. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14608.patch +0 -34
  1028. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14667.patch +0 -29
  1029. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14672.patch +0 -12
  1030. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14730.patch +0 -43
  1031. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14752.patch +0 -32
  1032. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14787.patch +0 -35
  1033. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14855.patch +0 -23
  1034. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14915.patch +0 -21
  1035. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14997.patch +0 -15
  1036. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-14999.patch +0 -28
  1037. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15061.patch +0 -19
  1038. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15202.patch +0 -22
  1039. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15213.patch +0 -16
  1040. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15252.patch +0 -59
  1041. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15320.patch +0 -13
  1042. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15347.patch +0 -15
  1043. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15388.patch +0 -25
  1044. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15400.patch +0 -19
  1045. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15498.patch +0 -14
  1046. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15695.patch +0 -39
  1047. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15738.patch +0 -185
  1048. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15781.patch +0 -20
  1049. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15789.patch +0 -69
  1050. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15790.patch +0 -38
  1051. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15814.patch +0 -14
  1052. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15819.patch +0 -101
  1053. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15851.patch +0 -40
  1054. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15902.patch +0 -17
  1055. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-15996.patch +0 -28
  1056. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16041.patch +0 -27
  1057. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16046.patch +0 -17
  1058. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16139.patch +0 -15
  1059. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16229.patch +0 -19
  1060. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16255.patch +0 -17
  1061. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16379.patch +0 -21
  1062. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16400.patch +0 -29
  1063. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16408.patch +0 -39
  1064. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16527.patch +0 -14
  1065. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16595.patch +0 -39
  1066. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16816.patch +0 -27
  1067. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16820.patch +0 -98
  1068. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16873.patch +0 -23
  1069. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-16910.patch +0 -39
  1070. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-17051.patch +0 -15
  1071. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/django__django-17087.patch +0 -19
  1072. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-18869.patch +0 -29
  1073. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-22711.patch +0 -38
  1074. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-22835.patch +0 -27
  1075. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23299.patch +0 -65
  1076. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23314.patch +0 -15
  1077. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23476.patch +0 -19
  1078. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23562.patch +0 -23
  1079. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23563.patch +0 -14
  1080. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23913.patch +0 -95
  1081. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23964.patch +0 -14
  1082. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-23987.patch +0 -55
  1083. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-24149.patch +0 -20
  1084. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-24265.patch +0 -52
  1085. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-24334.patch +0 -23
  1086. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-24970.patch +0 -24
  1087. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25079.patch +0 -38
  1088. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25311.patch +0 -62
  1089. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25332.patch +0 -42
  1090. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25433.patch +0 -126
  1091. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25442.patch +0 -27
  1092. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-25498.patch +0 -67
  1093. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-26011.patch +0 -55
  1094. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/matplotlib__matplotlib-26020.patch +0 -42
  1095. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/mwaskom__seaborn-2848.patch +0 -95
  1096. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/mwaskom__seaborn-3010.patch +0 -17
  1097. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/mwaskom__seaborn-3190.patch +0 -73
  1098. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/mwaskom__seaborn-3407.patch +0 -31
  1099. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pallets__flask-4045.patch +0 -21
  1100. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pallets__flask-4992.patch +0 -47
  1101. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pallets__flask-5063.patch +0 -90
  1102. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-1963.patch +0 -29
  1103. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-2148.patch +0 -79
  1104. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-2317.patch +0 -38
  1105. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-2674.patch +0 -58
  1106. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-3362.patch +0 -17
  1107. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/psf__requests-863.patch +0 -20
  1108. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pydata__xarray-3364.patch +0 -159
  1109. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pydata__xarray-4094.patch +0 -17
  1110. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pydata__xarray-4248.patch +0 -134
  1111. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pydata__xarray-4493.patch +0 -20
  1112. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pydata__xarray-5131.patch +0 -23
  1113. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-5859.patch +0 -15
  1114. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-6506.patch +0 -36
  1115. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-7080.patch +0 -31
  1116. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-7114.patch +0 -51
  1117. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-7228.patch +0 -80
  1118. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pylint-dev__pylint-7993.patch +0 -54
  1119. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-11143.patch +0 -17
  1120. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-11148.patch +0 -27
  1121. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5103.patch +0 -350
  1122. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5221.patch +0 -18
  1123. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5227.patch +0 -15
  1124. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5413.patch +0 -20
  1125. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5495.patch +0 -44
  1126. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-5692.patch +0 -69
  1127. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-6116.patch +0 -17
  1128. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-7168.patch +0 -14
  1129. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-7220.patch +0 -391
  1130. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-7373.patch +0 -48
  1131. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-7432.patch +0 -99
  1132. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-7490.patch +0 -4
  1133. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-8365.patch +0 -27
  1134. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-8906.patch +0 -23
  1135. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/pytest-dev__pytest-9359.patch +0 -89
  1136. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-10297.patch +0 -22
  1137. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-10508.patch +0 -37
  1138. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-10949.patch +0 -66
  1139. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-11040.patch +0 -147
  1140. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-11281.patch +0 -107
  1141. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-12471.patch +0 -22
  1142. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13142.patch +0 -19
  1143. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13241.patch +0 -26
  1144. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13439.patch +0 -29
  1145. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13496.patch +0 -62
  1146. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13497.patch +0 -23
  1147. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13584.patch +0 -38
  1148. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-13779.patch +0 -16
  1149. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-14087.patch +0 -17
  1150. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-14092.patch +0 -61
  1151. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-14894.patch +0 -45
  1152. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-14983.patch +0 -36
  1153. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-15512.patch +0 -143
  1154. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-15535.patch +0 -17
  1155. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-25500.patch +0 -79
  1156. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-25570.patch +0 -71
  1157. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-25638.patch +0 -70
  1158. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/scikit-learn__scikit-learn-25747.patch +0 -14
  1159. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-10325.patch +0 -153
  1160. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-10451.patch +0 -99
  1161. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-11445.patch +0 -67
  1162. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-7686.patch +0 -50
  1163. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-7738.patch +0 -22
  1164. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-7975.patch +0 -92
  1165. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8273.patch +0 -103
  1166. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8282.patch +0 -45
  1167. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8435.patch +0 -56
  1168. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8474.patch +0 -21
  1169. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8506.patch +0 -33
  1170. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8595.patch +0 -45
  1171. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8627.patch +0 -62
  1172. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8713.patch +0 -19
  1173. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8721.patch +0 -17
  1174. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sphinx-doc__sphinx-8801.patch +0 -101
  1175. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-11400.patch +0 -55
  1176. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-11870.patch +0 -47
  1177. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-11897.patch +0 -152
  1178. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-12171.patch +0 -20
  1179. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-12236.patch +0 -17
  1180. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-12419.patch +0 -37
  1181. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-12454.patch +0 -22
  1182. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-12481.patch +0 -44
  1183. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13031.patch +0 -71
  1184. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13043.patch +0 -25
  1185. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13146.patch +0 -54
  1186. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13177.patch +0 -14
  1187. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13437.patch +0 -17
  1188. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13471.patch +0 -15
  1189. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13480.patch +0 -19
  1190. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13647.patch +0 -22
  1191. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13773.patch +0 -32
  1192. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13895.patch +0 -55
  1193. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13915.patch +0 -70
  1194. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-13971.patch +0 -20
  1195. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14024.patch +0 -56
  1196. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14308.patch +0 -189
  1197. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14317.patch +0 -41
  1198. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14396.patch +0 -32
  1199. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14774.patch +0 -15
  1200. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-14817.patch +0 -19
  1201. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15011.patch +0 -31
  1202. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15308.patch +0 -21
  1203. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15345.patch +0 -13
  1204. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15346.patch +0 -21
  1205. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15609.patch +0 -11
  1206. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-15678.patch +0 -87
  1207. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-16106.patch +0 -66
  1208. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-16281.patch +0 -88
  1209. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-16503.patch +0 -18
  1210. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-16792.patch +0 -20
  1211. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-16988.patch +0 -22
  1212. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-17022.patch +0 -38
  1213. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-17139.patch +0 -48
  1214. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-17630.patch +0 -116
  1215. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-17655.patch +0 -27
  1216. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18057.patch +0 -31
  1217. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18087.patch +0 -55
  1218. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18189.patch +0 -15
  1219. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18199.patch +0 -25
  1220. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18532.patch +0 -84
  1221. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18621.patch +0 -21
  1222. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18698.patch +0 -60
  1223. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-18835.patch +0 -39
  1224. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-19007.patch +0 -143
  1225. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-19254.patch +0 -79
  1226. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-19487.patch +0 -37
  1227. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20049.patch +0 -37
  1228. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20154.patch +0 -27
  1229. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20212.patch +0 -15
  1230. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20322.patch +0 -23
  1231. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20442.patch +0 -73
  1232. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20590.patch +0 -16
  1233. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-20639.patch +0 -20
  1234. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21055.patch +0 -47
  1235. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21171.patch +0 -27
  1236. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21379.patch +0 -27
  1237. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21612.patch +0 -46
  1238. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21614.patch +0 -23
  1239. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21627.patch +0 -28
  1240. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-21847.patch +0 -24
  1241. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-22005.patch +0 -36
  1242. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-22714.patch +0 -19
  1243. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-22840.patch +0 -19
  1244. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-23117.patch +0 -18
  1245. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-23191.patch +0 -42
  1246. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-23262.patch +0 -18
  1247. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-24066.patch +0 -26
  1248. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-24102.patch +0 -17
  1249. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-24152.patch +0 -23
  1250. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-24213.patch +0 -18
  1251. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-patches/sympy__sympy-24909.patch +0 -39
  1252. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-predictions.json +0 -1802
  1253. package/skills/loki-mode/benchmarks/results/2026-01-05-10-37-54/swebench-loki-results.json +0 -1816
  1254. package/skills/loki-mode/benchmarks/results/SUMMARY.md +0 -32
  1255. package/skills/loki-mode/benchmarks/results/humaneval-loki-results.json +0 -1001
  1256. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/0.py +0 -21
  1257. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/1.py +0 -36
  1258. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/10.py +0 -30
  1259. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/100.py +0 -23
  1260. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/101.py +0 -17
  1261. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/102.py +0 -21
  1262. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/103.py +0 -22
  1263. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/104.py +0 -23
  1264. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/105.py +0 -34
  1265. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/106.py +0 -26
  1266. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/107.py +0 -40
  1267. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/108.py +0 -27
  1268. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/109.py +0 -53
  1269. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/11.py +0 -21
  1270. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/110.py +0 -25
  1271. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/111.py +0 -34
  1272. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/112.py +0 -20
  1273. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/113.py +0 -25
  1274. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/114.py +0 -24
  1275. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/115.py +0 -41
  1276. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/116.py +0 -17
  1277. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/117.py +0 -30
  1278. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/118.py +0 -31
  1279. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/119.py +0 -35
  1280. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/12.py +0 -27
  1281. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/120.py +0 -33
  1282. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/121.py +0 -15
  1283. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/122.py +0 -24
  1284. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/123.py +0 -35
  1285. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/124.py +0 -58
  1286. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/125.py +0 -25
  1287. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/126.py +0 -34
  1288. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/127.py +0 -41
  1289. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/128.py +0 -31
  1290. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/129.py +0 -62
  1291. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/13.py +0 -17
  1292. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/130.py +0 -35
  1293. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/131.py +0 -24
  1294. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/132.py +0 -32
  1295. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/133.py +0 -21
  1296. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/134.py +0 -23
  1297. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/135.py +0 -20
  1298. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/136.py +0 -24
  1299. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/137.py +0 -31
  1300. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/138.py +0 -13
  1301. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/139.py +0 -23
  1302. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/14.py +0 -14
  1303. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/140.py +0 -26
  1304. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/141.py +0 -42
  1305. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/142.py +0 -25
  1306. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/143.py +0 -40
  1307. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/144.py +0 -24
  1308. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/145.py +0 -24
  1309. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/146.py +0 -21
  1310. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/147.py +0 -32
  1311. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/148.py +0 -33
  1312. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/149.py +0 -22
  1313. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/15.py +0 -13
  1314. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/150.py +0 -26
  1315. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/151.py +0 -22
  1316. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/152.py +0 -21
  1317. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/153.py +0 -32
  1318. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/154.py +0 -25
  1319. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/155.py +0 -20
  1320. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/156.py +0 -39
  1321. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/157.py +0 -28
  1322. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/158.py +0 -16
  1323. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/159.py +0 -36
  1324. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/16.py +0 -13
  1325. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/160.py +0 -34
  1326. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/161.py +0 -29
  1327. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/162.py +0 -16
  1328. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/163.py +0 -18
  1329. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/17.py +0 -27
  1330. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/18.py +0 -23
  1331. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/19.py +0 -34
  1332. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/2.py +0 -15
  1333. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/20.py +0 -27
  1334. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/21.py +0 -18
  1335. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/22.py +0 -16
  1336. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/23.py +0 -13
  1337. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/24.py +0 -14
  1338. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/25.py +0 -29
  1339. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/26.py +0 -17
  1340. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/27.py +0 -11
  1341. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/28.py +0 -16
  1342. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/29.py +0 -16
  1343. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/3.py +0 -23
  1344. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/30.py +0 -13
  1345. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/31.py +0 -34
  1346. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/32.py +0 -37
  1347. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/33.py +0 -19
  1348. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/34.py +0 -11
  1349. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/35.py +0 -13
  1350. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/36.py +0 -19
  1351. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/37.py +0 -19
  1352. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/38.py +0 -25
  1353. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/39.py +0 -40
  1354. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/4.py +0 -23
  1355. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/40.py +0 -43
  1356. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/41.py +0 -18
  1357. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/42.py +0 -13
  1358. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/43.py +0 -27
  1359. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/44.py +0 -25
  1360. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/45.py +0 -11
  1361. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/46.py +0 -35
  1362. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/47.py +0 -19
  1363. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/48.py +0 -18
  1364. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/49.py +0 -26
  1365. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/5.py +0 -25
  1366. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/50.py +0 -10
  1367. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/51.py +0 -23
  1368. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/52.py +0 -13
  1369. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/53.py +0 -13
  1370. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/54.py +0 -22
  1371. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/55.py +0 -20
  1372. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/56.py +0 -27
  1373. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/57.py +0 -27
  1374. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/58.py +0 -14
  1375. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/59.py +0 -32
  1376. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/6.py +0 -33
  1377. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/60.py +0 -19
  1378. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/61.py +0 -27
  1379. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/62.py +0 -15
  1380. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/63.py +0 -30
  1381. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/64.py +0 -29
  1382. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/65.py +0 -18
  1383. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/66.py +0 -23
  1384. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/67.py +0 -22
  1385. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/68.py +0 -55
  1386. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/69.py +0 -26
  1387. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/7.py +0 -16
  1388. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/70.py +0 -35
  1389. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/71.py +0 -29
  1390. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/72.py +0 -24
  1391. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/73.py +0 -22
  1392. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/74.py +0 -26
  1393. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/75.py +0 -42
  1394. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/76.py +0 -25
  1395. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/77.py +0 -25
  1396. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/78.py +0 -29
  1397. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/79.py +0 -18
  1398. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/8.py +0 -24
  1399. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/80.py +0 -26
  1400. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/81.py +0 -61
  1401. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/82.py +0 -21
  1402. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/83.py +0 -13
  1403. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/84.py +0 -21
  1404. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/85.py +0 -17
  1405. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/86.py +0 -21
  1406. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/87.py +0 -31
  1407. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/88.py +0 -29
  1408. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/89.py +0 -25
  1409. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/9.py +0 -25
  1410. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/90.py +0 -21
  1411. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/91.py +0 -31
  1412. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/92.py +0 -29
  1413. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/93.py +0 -30
  1414. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/94.py +0 -32
  1415. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/95.py +0 -32
  1416. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/96.py +0 -38
  1417. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/97.py +0 -16
  1418. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/98.py +0 -20
  1419. package/skills/loki-mode/benchmarks/results/humaneval-loki-solutions/99.py +0 -36
  1420. package/skills/loki-mode/benchmarks/run-benchmarks.sh +0 -1948
  1421. package/skills/loki-mode/benchmarks/submission-template/README.md +0 -111
  1422. package/skills/loki-mode/benchmarks/submission-template/metadata.yaml +0 -76
  1423. package/skills/loki-mode/demo/README.md +0 -137
  1424. package/skills/loki-mode/demo/loki-demo.gif +0 -0
  1425. package/skills/loki-mode/demo/record-demo.sh +0 -69
  1426. package/skills/loki-mode/demo/record-full-demo.sh +0 -208
  1427. package/skills/loki-mode/demo/recordings/loki-demo.cast +0 -93
  1428. package/skills/loki-mode/demo/run-demo-auto.sh +0 -293
  1429. package/skills/loki-mode/demo/run-demo.sh +0 -323
  1430. package/skills/loki-mode/demo/vhs-tape.tape +0 -223
  1431. package/skills/loki-mode/demo/voice-over-script.md +0 -246
  1432. package/skills/loki-mode/examples/api-only.md +0 -79
  1433. package/skills/loki-mode/examples/full-stack-demo.md +0 -123
  1434. package/skills/loki-mode/examples/simple-todo-app.md +0 -60
  1435. package/skills/loki-mode/examples/static-landing-page.md +0 -73
  1436. package/skills/loki-mode/examples/todo-app-generated/.loki/CONTINUITY.md +0 -59
  1437. package/skills/loki-mode/examples/todo-app-generated/.loki/queue/completed.json +0 -1
  1438. package/skills/loki-mode/examples/todo-app-generated/.loki/queue/dead-letter.json +0 -1
  1439. package/skills/loki-mode/examples/todo-app-generated/.loki/queue/failed.json +0 -1
  1440. package/skills/loki-mode/examples/todo-app-generated/.loki/queue/in-progress.json +0 -1
  1441. package/skills/loki-mode/examples/todo-app-generated/.loki/queue/pending.json +0 -382
  1442. package/skills/loki-mode/examples/todo-app-generated/.loki/state/orchestrator.json +0 -41
  1443. package/skills/loki-mode/examples/todo-app-generated/E2E_VERIFICATION_REPORT.md +0 -668
  1444. package/skills/loki-mode/examples/todo-app-generated/PRD.md +0 -60
  1445. package/skills/loki-mode/examples/todo-app-generated/TASK_018_COMPLETION.md +0 -229
  1446. package/skills/loki-mode/examples/todo-app-generated/TESTING_DOCUMENTATION.md +0 -327
  1447. package/skills/loki-mode/examples/todo-app-generated/TEST_REPORT.md +0 -201
  1448. package/skills/loki-mode/examples/todo-app-generated/VERIFICATION_SUMMARY.txt +0 -362
  1449. package/skills/loki-mode/examples/todo-app-generated/backend/package-lock.json +0 -2698
  1450. package/skills/loki-mode/examples/todo-app-generated/backend/package.json +0 -26
  1451. package/skills/loki-mode/examples/todo-app-generated/backend/src/db/database.ts +0 -24
  1452. package/skills/loki-mode/examples/todo-app-generated/backend/src/db/db.ts +0 -35
  1453. package/skills/loki-mode/examples/todo-app-generated/backend/src/db/index.ts +0 -2
  1454. package/skills/loki-mode/examples/todo-app-generated/backend/src/db/migrations.ts +0 -31
  1455. package/skills/loki-mode/examples/todo-app-generated/backend/src/db/schema.sql +0 -8
  1456. package/skills/loki-mode/examples/todo-app-generated/backend/src/index.ts +0 -44
  1457. package/skills/loki-mode/examples/todo-app-generated/backend/src/routes/todos.ts +0 -155
  1458. package/skills/loki-mode/examples/todo-app-generated/backend/src/types/index.ts +0 -35
  1459. package/skills/loki-mode/examples/todo-app-generated/backend/todos.db-shm +0 -0
  1460. package/skills/loki-mode/examples/todo-app-generated/backend/todos.db-wal +0 -0
  1461. package/skills/loki-mode/examples/todo-app-generated/backend/tsconfig.json +0 -30
  1462. package/skills/loki-mode/examples/todo-app-generated/frontend/index.html +0 -13
  1463. package/skills/loki-mode/examples/todo-app-generated/frontend/package-lock.json +0 -2014
  1464. package/skills/loki-mode/examples/todo-app-generated/frontend/package.json +0 -26
  1465. package/skills/loki-mode/examples/todo-app-generated/frontend/src/App.css +0 -384
  1466. package/skills/loki-mode/examples/todo-app-generated/frontend/src/App.tsx +0 -81
  1467. package/skills/loki-mode/examples/todo-app-generated/frontend/src/api/todos.ts +0 -57
  1468. package/skills/loki-mode/examples/todo-app-generated/frontend/src/components/ConfirmDialog.tsx +0 -26
  1469. package/skills/loki-mode/examples/todo-app-generated/frontend/src/components/EmptyState.tsx +0 -8
  1470. package/skills/loki-mode/examples/todo-app-generated/frontend/src/components/TodoForm.tsx +0 -43
  1471. package/skills/loki-mode/examples/todo-app-generated/frontend/src/components/TodoItem.tsx +0 -36
  1472. package/skills/loki-mode/examples/todo-app-generated/frontend/src/components/TodoList.tsx +0 -27
  1473. package/skills/loki-mode/examples/todo-app-generated/frontend/src/hooks/useTodos.ts +0 -81
  1474. package/skills/loki-mode/examples/todo-app-generated/frontend/src/index.css +0 -48
  1475. package/skills/loki-mode/examples/todo-app-generated/frontend/src/main.tsx +0 -10
  1476. package/skills/loki-mode/examples/todo-app-generated/frontend/src/vite-env.d.ts +0 -1
  1477. package/skills/loki-mode/examples/todo-app-generated/frontend/tsconfig.json +0 -23
  1478. package/skills/loki-mode/examples/todo-app-generated/frontend/tsconfig.node.json +0 -10
  1479. package/skills/loki-mode/examples/todo-app-generated/frontend/vite.config.ts +0 -15
  1480. package/skills/theme-factory/theme-showcase.pdf +0 -0
  1481. package/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-314.pyc +0 -0
  1482. package/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-314.pyc +0 -0
  1483. package/skills/web-artifacts-builder/scripts/shadcn-components.tar.gz +0 -0
@@ -1,1948 +0,0 @@
1
- #!/bin/bash
2
- #===============================================================================
3
- # Loki Mode Benchmark Runner
4
- # Run HumanEval and SWE-bench benchmarks to validate multi-agent performance
5
- #
6
- # Usage:
7
- # ./benchmarks/run-benchmarks.sh [benchmark] [options]
8
- # ./benchmarks/run-benchmarks.sh humaneval # Setup only
9
- # ./benchmarks/run-benchmarks.sh humaneval --execute # Direct Claude (baseline)
10
- # ./benchmarks/run-benchmarks.sh humaneval --execute --loki # Multi-agent Loki Mode
11
- # ./benchmarks/run-benchmarks.sh humaneval --execute --limit 10 # First 10 problems
12
- # ./benchmarks/run-benchmarks.sh swebench --execute # Run SWE-bench
13
- # ./benchmarks/run-benchmarks.sh all --execute # Run all benchmarks
14
- #
15
- # Options:
16
- # --execute Actually run problems through Claude (vs just setup)
17
- # --loki Use Loki Mode multi-agent system (Architect->Engineer->QA->Reviewer)
18
- # --limit N Only run first N problems (useful for testing)
19
- # --parallel N Run N problems in parallel (default: 1)
20
- # --model MODEL Claude model to use (default: sonnet)
21
- # --timeout N Timeout per problem in seconds (default: 120)
22
- # --retries N Max RARV retry attempts for --loki mode (default: 3)
23
- #
24
- # Prerequisites:
25
- # - Python 3.8+
26
- # - Claude Code CLI
27
- # - Git
28
- #
29
- # Results are saved to:
30
- # ./benchmarks/results/YYYY-MM-DD-HH-MM-SS/
31
- #===============================================================================
32
-
33
- set -uo pipefail
34
-
35
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
36
- PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
37
- RESULTS_DIR="$SCRIPT_DIR/results/$(date +%Y-%m-%d-%H-%M-%S)"
38
-
39
- # Configuration
40
- EXECUTE_MODE=false
41
- LOKI_MODE=false # Use multi-agent Loki Mode vs direct Claude
42
- PROBLEM_LIMIT=0 # 0 = all problems
43
- PARALLEL_COUNT=1
44
- CLAUDE_MODEL="sonnet"
45
- PROBLEM_TIMEOUT=120
46
- MAX_RETRIES=3 # RARV retry attempts
47
-
48
- # Colors
49
- RED='\033[0;31m'
50
- GREEN='\033[0;32m'
51
- YELLOW='\033[1;33m'
52
- CYAN='\033[0;36m'
53
- BLUE='\033[0;34m'
54
- MAGENTA='\033[0;35m'
55
- NC='\033[0m'
56
-
57
- log_info() { echo -e "${CYAN}[INFO]${NC} $1"; }
58
- log_success() { echo -e "${GREEN}[PASS]${NC} $1"; }
59
- log_warning() { echo -e "${YELLOW}[WARN]${NC} $1"; }
60
- log_error() { echo -e "${RED}[FAIL]${NC} $1"; }
61
- log_progress() { echo -e "${BLUE}[PROG]${NC} $1"; }
62
-
63
- #===============================================================================
64
- # Argument Parsing
65
- #===============================================================================
66
-
67
- parse_args() {
68
- local positional=()
69
-
70
- while [[ $# -gt 0 ]]; do
71
- case $1 in
72
- --execute)
73
- EXECUTE_MODE=true
74
- shift
75
- ;;
76
- --loki)
77
- LOKI_MODE=true
78
- shift
79
- ;;
80
- --limit)
81
- PROBLEM_LIMIT="$2"
82
- shift 2
83
- ;;
84
- --parallel)
85
- PARALLEL_COUNT="$2"
86
- shift 2
87
- ;;
88
- --model)
89
- CLAUDE_MODEL="$2"
90
- shift 2
91
- ;;
92
- --timeout)
93
- PROBLEM_TIMEOUT="$2"
94
- shift 2
95
- ;;
96
- --retries)
97
- MAX_RETRIES="$2"
98
- shift 2
99
- ;;
100
- -*)
101
- log_error "Unknown option: $1"
102
- exit 1
103
- ;;
104
- *)
105
- positional+=("$1")
106
- shift
107
- ;;
108
- esac
109
- done
110
-
111
- # Restore positional parameters
112
- set -- "${positional[@]}"
113
- BENCHMARK="${1:-all}"
114
- }
115
-
116
- #===============================================================================
117
- # Setup
118
- #===============================================================================
119
-
120
- setup_environment() {
121
- log_info "Setting up benchmark environment..."
122
-
123
- mkdir -p "$RESULTS_DIR"
124
- mkdir -p "$SCRIPT_DIR/datasets"
125
- mkdir -p "$SCRIPT_DIR/workspaces"
126
-
127
- # Check prerequisites
128
- if ! command -v python3 &> /dev/null; then
129
- log_error "Python 3 is required"
130
- exit 1
131
- fi
132
-
133
- if ! command -v claude &> /dev/null; then
134
- log_error "Claude Code CLI is required"
135
- exit 1
136
- fi
137
-
138
- # Install benchmark dependencies if needed
139
- if [ ! -d "$SCRIPT_DIR/venv" ]; then
140
- log_info "Creating virtual environment..."
141
- python3 -m venv "$SCRIPT_DIR/venv"
142
- fi
143
-
144
- source "$SCRIPT_DIR/venv/bin/activate"
145
- pip install -q requests tqdm
146
-
147
- log_success "Environment ready"
148
- }
149
-
150
- #===============================================================================
151
- # HumanEval Benchmark
152
- #===============================================================================
153
-
154
- download_humaneval() {
155
- local dataset_file="$SCRIPT_DIR/datasets/humaneval.jsonl"
156
-
157
- if [ -f "$dataset_file" ]; then
158
- log_info "HumanEval dataset already downloaded"
159
- return
160
- fi
161
-
162
- log_info "Downloading HumanEval dataset..."
163
- curl -sL "https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz" | \
164
- gunzip > "$dataset_file"
165
-
166
- log_success "HumanEval dataset downloaded (164 problems)"
167
- }
168
-
169
- run_humaneval() {
170
- log_info "Running HumanEval benchmark..."
171
-
172
- download_humaneval
173
-
174
- if [ "$EXECUTE_MODE" = true ]; then
175
- if [ "$LOKI_MODE" = true ]; then
176
- run_humaneval_loki
177
- else
178
- run_humaneval_execute
179
- fi
180
- else
181
- run_humaneval_setup
182
- fi
183
- }
184
-
185
- run_humaneval_setup() {
186
- local dataset_file="$SCRIPT_DIR/datasets/humaneval.jsonl"
187
- local results_file="$RESULTS_DIR/humaneval-results.json"
188
-
189
- python3 << 'HUMANEVAL_SETUP'
190
- import json
191
- import os
192
- from datetime import datetime
193
-
194
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
195
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
196
-
197
- dataset_file = f"{SCRIPT_DIR}/datasets/humaneval.jsonl"
198
- results_file = f"{RESULTS_DIR}/humaneval-results.json"
199
-
200
- problems = []
201
- with open(dataset_file, 'r') as f:
202
- for line in f:
203
- problems.append(json.loads(line))
204
-
205
- print(f"Loaded {len(problems)} HumanEval problems")
206
-
207
- results = {
208
- "benchmark": "HumanEval",
209
- "version": "1.0",
210
- "timestamp": datetime.now().isoformat(),
211
- "total_problems": len(problems),
212
- "status": "INFRASTRUCTURE_READY",
213
- "note": "Run with --execute to run actual tests.",
214
- "sample_problems": [p["task_id"] for p in problems[:5]]
215
- }
216
-
217
- with open(results_file, 'w') as f:
218
- json.dump(results, f, indent=2)
219
-
220
- print(f"Results saved to {results_file}")
221
- print("\nTo run actual benchmarks:")
222
- print(" ./benchmarks/run-benchmarks.sh humaneval --execute")
223
- print(" ./benchmarks/run-benchmarks.sh humaneval --execute --limit 10")
224
- HUMANEVAL_SETUP
225
-
226
- log_success "HumanEval benchmark infrastructure ready"
227
- log_info "Results: $RESULTS_DIR/humaneval-results.json"
228
- }
229
-
230
- run_humaneval_execute() {
231
- local dataset_file="$SCRIPT_DIR/datasets/humaneval.jsonl"
232
- local results_file="$RESULTS_DIR/humaneval-results.json"
233
- local solutions_dir="$RESULTS_DIR/humaneval-solutions"
234
-
235
- mkdir -p "$solutions_dir"
236
-
237
- log_info "Executing HumanEval benchmark with Claude..."
238
- log_info "Model: $CLAUDE_MODEL | Timeout: ${PROBLEM_TIMEOUT}s | Limit: ${PROBLEM_LIMIT:-all}"
239
-
240
- # Export variables for Python
241
- export PROBLEM_LIMIT PROBLEM_TIMEOUT CLAUDE_MODEL
242
-
243
- python3 << 'HUMANEVAL_EXECUTE'
244
- import json
245
- import subprocess
246
- import os
247
- import sys
248
- import time
249
- import tempfile
250
- import traceback
251
- from datetime import datetime
252
- from concurrent.futures import ThreadPoolExecutor, as_completed
253
-
254
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
255
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
256
- PROBLEM_LIMIT = int(os.environ.get('PROBLEM_LIMIT', '0'))
257
- PROBLEM_TIMEOUT = int(os.environ.get('PROBLEM_TIMEOUT', '120'))
258
- CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL', 'sonnet')
259
-
260
- dataset_file = f"{SCRIPT_DIR}/datasets/humaneval.jsonl"
261
- results_file = f"{RESULTS_DIR}/humaneval-results.json"
262
- solutions_dir = f"{RESULTS_DIR}/humaneval-solutions"
263
-
264
- # Load problems
265
- problems = []
266
- with open(dataset_file, 'r') as f:
267
- for line in f:
268
- problems.append(json.loads(line))
269
-
270
- if PROBLEM_LIMIT > 0:
271
- problems = problems[:PROBLEM_LIMIT]
272
-
273
- print(f"\n{'='*60}")
274
- print(f" HumanEval Benchmark Execution")
275
- print(f" Problems: {len(problems)} | Model: {CLAUDE_MODEL}")
276
- print(f"{'='*60}\n")
277
-
278
- def solve_problem(problem):
279
- """Send a HumanEval problem to Claude and get solution."""
280
- task_id = problem["task_id"]
281
- prompt = problem["prompt"]
282
- entry_point = problem["entry_point"]
283
- test = problem["test"]
284
- canonical = problem.get("canonical_solution", "")
285
-
286
- # Create prompt for Claude - ask for COMPLETE function to avoid indentation issues
287
- claude_prompt = f'''You are solving a HumanEval coding problem. Complete the Python function below.
288
-
289
- {prompt}
290
-
291
- INSTRUCTIONS:
292
- 1. Output the COMPLETE function including the signature and docstring shown above
293
- 2. Fill in the implementation after the docstring
294
- 3. Use proper 4-space indentation for the function body
295
- 4. Output ONLY the Python code - no markdown, no explanation, no ```python blocks
296
- 5. The function must be syntactically valid Python
297
-
298
- Output the complete function now:'''
299
-
300
- try:
301
- # Call Claude
302
- result = subprocess.run(
303
- ['claude', '-p', claude_prompt, '--model', CLAUDE_MODEL],
304
- capture_output=True,
305
- text=True,
306
- timeout=PROBLEM_TIMEOUT
307
- )
308
-
309
- solution = result.stdout.strip()
310
-
311
- # Clean up solution - remove markdown code blocks if present
312
- if solution.startswith("```python"):
313
- solution = solution[9:]
314
- if solution.startswith("```"):
315
- solution = solution[3:]
316
- if solution.endswith("```"):
317
- solution = solution[:-3]
318
- solution = solution.strip()
319
-
320
- # Verify solution contains the function definition
321
- if f"def {entry_point}" not in solution:
322
- # Claude didn't include function signature, prepend it
323
- # Indent the body properly
324
- lines = solution.split('\n')
325
- indented_lines = [' ' + line if line.strip() and not line.startswith(' ') else line for line in lines]
326
- solution = prompt + '\n'.join(indented_lines)
327
-
328
- return {
329
- "task_id": task_id,
330
- "solution": solution,
331
- "solution_body": solution,
332
- "error": None
333
- }
334
- except subprocess.TimeoutExpired:
335
- return {
336
- "task_id": task_id,
337
- "solution": None,
338
- "solution_body": None,
339
- "error": "TIMEOUT"
340
- }
341
- except Exception as e:
342
- return {
343
- "task_id": task_id,
344
- "solution": None,
345
- "solution_body": None,
346
- "error": str(e)
347
- }
348
-
349
- def test_solution(problem, solution):
350
- """Execute the solution against HumanEval test cases."""
351
- task_id = problem["task_id"]
352
- test = problem["test"]
353
- entry_point = problem["entry_point"]
354
-
355
- if solution is None:
356
- return {"task_id": task_id, "passed": False, "error": "No solution"}
357
-
358
- # Create test file
359
- test_code = f'''
360
- {solution}
361
-
362
- {test}
363
-
364
- # Run the check function
365
- check({entry_point})
366
- print("PASSED")
367
- '''
368
-
369
- try:
370
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
371
- f.write(test_code)
372
- test_file = f.name
373
-
374
- result = subprocess.run(
375
- ['python3', test_file],
376
- capture_output=True,
377
- text=True,
378
- timeout=30
379
- )
380
-
381
- os.unlink(test_file)
382
-
383
- passed = "PASSED" in result.stdout
384
- return {
385
- "task_id": task_id,
386
- "passed": passed,
387
- "stdout": result.stdout[:500],
388
- "stderr": result.stderr[:500] if not passed else "",
389
- "error": None
390
- }
391
- except subprocess.TimeoutExpired:
392
- return {"task_id": task_id, "passed": False, "error": "TEST_TIMEOUT"}
393
- except Exception as e:
394
- return {"task_id": task_id, "passed": False, "error": str(e)}
395
-
396
- # Run benchmark
397
- results = {
398
- "benchmark": "HumanEval",
399
- "version": "1.0",
400
- "timestamp": datetime.now().isoformat(),
401
- "model": CLAUDE_MODEL,
402
- "timeout_per_problem": PROBLEM_TIMEOUT,
403
- "total_problems": len(problems),
404
- "status": "RUNNING",
405
- "problems": []
406
- }
407
-
408
- passed_count = 0
409
- failed_count = 0
410
- error_count = 0
411
- start_time = time.time()
412
-
413
- for i, problem in enumerate(problems):
414
- task_id = problem["task_id"]
415
- task_num = task_id.split("/")[1]
416
-
417
- print(f"[{i+1}/{len(problems)}] {task_id}...", end=" ", flush=True)
418
-
419
- # Get solution from Claude
420
- solution_result = solve_problem(problem)
421
-
422
- if solution_result["error"]:
423
- print(f"\033[0;31mERROR: {solution_result['error']}\033[0m")
424
- error_count += 1
425
- problem_result = {
426
- "task_id": task_id,
427
- "passed": False,
428
- "error": solution_result["error"],
429
- "solution": None
430
- }
431
- else:
432
- # Save solution
433
- solution_file = f"{solutions_dir}/{task_num}.py"
434
- with open(solution_file, 'w') as f:
435
- f.write(solution_result["solution"])
436
-
437
- # Test solution
438
- test_result = test_solution(problem, solution_result["solution"])
439
-
440
- if test_result["passed"]:
441
- print(f"\033[0;32mPASSED\033[0m")
442
- passed_count += 1
443
- else:
444
- print(f"\033[0;31mFAILED\033[0m")
445
- failed_count += 1
446
-
447
- problem_result = {
448
- "task_id": task_id,
449
- "passed": test_result["passed"],
450
- "error": test_result.get("error"),
451
- "solution_file": solution_file
452
- }
453
-
454
- results["problems"].append(problem_result)
455
-
456
- # Save intermediate results
457
- with open(results_file, 'w') as f:
458
- json.dump(results, f, indent=2)
459
-
460
- # Final results
461
- elapsed_time = time.time() - start_time
462
- pass_rate = (passed_count / len(problems)) * 100 if problems else 0
463
-
464
- results["status"] = "COMPLETED"
465
- results["passed"] = passed_count
466
- results["failed"] = failed_count
467
- results["errors"] = error_count
468
- results["pass_rate"] = round(pass_rate, 2)
469
- results["elapsed_seconds"] = round(elapsed_time, 2)
470
-
471
- with open(results_file, 'w') as f:
472
- json.dump(results, f, indent=2)
473
-
474
- print(f"\n{'='*60}")
475
- print(f" RESULTS")
476
- print(f"{'='*60}")
477
- print(f" Passed: {passed_count}/{len(problems)}")
478
- print(f" Failed: {failed_count}/{len(problems)}")
479
- print(f" Errors: {error_count}/{len(problems)}")
480
- print(f" Pass Rate: {pass_rate:.1f}%")
481
- print(f" Time: {elapsed_time:.1f}s")
482
- print(f"{'='*60}\n")
483
-
484
- # Compare to competitors
485
- print(" Competitor Comparison:")
486
- print(f" - MetaGPT: 85.9-87.7%")
487
- print(f" - Loki Mode: {pass_rate:.1f}%")
488
- if pass_rate >= 85:
489
- print(f" Status: \033[0;32mCOMPETITIVE\033[0m")
490
- elif pass_rate >= 70:
491
- print(f" Status: \033[0;33mGOOD\033[0m")
492
- else:
493
- print(f" Status: \033[0;31mNEEDS IMPROVEMENT\033[0m")
494
- print(f"{'='*60}\n")
495
- HUMANEVAL_EXECUTE
496
-
497
- log_success "HumanEval benchmark execution complete"
498
- log_info "Results: $results_file"
499
- log_info "Solutions: $solutions_dir/"
500
- }
501
-
502
- #===============================================================================
503
- # Loki Mode Multi-Agent HumanEval Benchmark
504
- # Uses: Architect -> Engineer -> QA -> Reviewer with RARV cycle
505
- #===============================================================================
506
-
507
- run_humaneval_loki() {
508
- local dataset_file="$SCRIPT_DIR/datasets/humaneval.jsonl"
509
- local results_file="$RESULTS_DIR/humaneval-loki-results.json"
510
- local solutions_dir="$RESULTS_DIR/humaneval-loki-solutions"
511
-
512
- mkdir -p "$solutions_dir"
513
-
514
- log_info "Executing HumanEval with Loki Mode Multi-Agent System..."
515
- log_info "Model: $CLAUDE_MODEL | Retries: $MAX_RETRIES | Limit: ${PROBLEM_LIMIT:-all}"
516
- log_info "Agents: Architect -> Engineer -> QA -> Reviewer (RARV cycle)"
517
-
518
- # Export variables for Python
519
- export PROBLEM_LIMIT PROBLEM_TIMEOUT CLAUDE_MODEL MAX_RETRIES
520
-
521
- python3 << 'HUMANEVAL_LOKI'
522
- import json
523
- import subprocess
524
- import os
525
- import sys
526
- import time
527
- import tempfile
528
- import traceback
529
- from datetime import datetime
530
-
531
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
532
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
533
- PROBLEM_LIMIT = int(os.environ.get('PROBLEM_LIMIT', '0'))
534
- PROBLEM_TIMEOUT = int(os.environ.get('PROBLEM_TIMEOUT', '120'))
535
- CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL', 'sonnet')
536
- MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '3'))
537
-
538
- dataset_file = f"{SCRIPT_DIR}/datasets/humaneval.jsonl"
539
- results_file = f"{RESULTS_DIR}/humaneval-loki-results.json"
540
- solutions_dir = f"{RESULTS_DIR}/humaneval-loki-solutions"
541
-
542
- # Load problems
543
- problems = []
544
- with open(dataset_file, 'r') as f:
545
- for line in f:
546
- problems.append(json.loads(line))
547
-
548
- if PROBLEM_LIMIT > 0:
549
- problems = problems[:PROBLEM_LIMIT]
550
-
551
- print(f"\n{'='*70}")
552
- print(f" LOKI MODE Multi-Agent HumanEval Benchmark")
553
- print(f" Problems: {len(problems)} | Model: {CLAUDE_MODEL} | Max Retries: {MAX_RETRIES}")
554
- print(f" Agent Pipeline: Architect -> Engineer -> QA -> Reviewer")
555
- print(f"{'='*70}\n")
556
-
557
- def call_agent(agent_name, prompt, timeout=PROBLEM_TIMEOUT):
558
- """Call a Loki Mode agent with a specific role."""
559
- try:
560
- result = subprocess.run(
561
- ['claude', '-p', prompt, '--model', CLAUDE_MODEL],
562
- capture_output=True,
563
- text=True,
564
- timeout=timeout
565
- )
566
- return result.stdout.strip(), None
567
- except subprocess.TimeoutExpired:
568
- return None, "TIMEOUT"
569
- except Exception as e:
570
- return None, str(e)
571
-
572
- def architect_agent(problem):
573
- """Architect: Analyze problem and design approach."""
574
- prompt = f'''You are the ARCHITECT AGENT in a multi-agent coding system.
575
-
576
- TASK: Analyze this HumanEval problem and design the solution approach.
577
-
578
- PROBLEM:
579
- {problem["prompt"]}
580
-
581
- Your job:
582
- 1. Understand what the function should do
583
- 2. Identify edge cases and constraints
584
- 3. Design the algorithm/approach
585
- 4. Note any potential pitfalls
586
-
587
- Output a brief analysis (3-5 lines) with:
588
- - What the function does
589
- - Key algorithm/approach
590
- - Edge cases to handle
591
-
592
- Keep it concise - the Engineer agent will implement based on your analysis.'''
593
-
594
- return call_agent("Architect", prompt, timeout=30)
595
-
596
- def engineer_agent(problem, architect_analysis):
597
- """Engineer: Implement the solution based on architect's design."""
598
- prompt = f'''You are the ENGINEER AGENT in a multi-agent coding system.
599
-
600
- TASK: Implement the solution based on the Architect's analysis.
601
-
602
- PROBLEM:
603
- {problem["prompt"]}
604
-
605
- ARCHITECT'S ANALYSIS:
606
- {architect_analysis}
607
-
608
- INSTRUCTIONS:
609
- 1. Output the COMPLETE function including signature and docstring
610
- 2. Implement based on the architect's approach
611
- 3. Use proper 4-space indentation
612
- 4. Handle the edge cases identified
613
- 5. Output ONLY Python code - no markdown, no explanation
614
-
615
- Output the complete function now:'''
616
-
617
- return call_agent("Engineer", prompt)
618
-
619
- def qa_agent(problem, solution):
620
- """QA: Test the solution and identify issues."""
621
- test = problem["test"]
622
- entry_point = problem["entry_point"]
623
-
624
- # First, actually run the tests
625
- test_code = f'''
626
- {solution}
627
-
628
- {test}
629
-
630
- check({entry_point})
631
- print("ALL_TESTS_PASSED")
632
- '''
633
-
634
- try:
635
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
636
- f.write(test_code)
637
- temp_file = f.name
638
-
639
- result = subprocess.run(
640
- ['python3', temp_file],
641
- capture_output=True,
642
- text=True,
643
- timeout=10
644
- )
645
-
646
- os.unlink(temp_file)
647
-
648
- if "ALL_TESTS_PASSED" in result.stdout:
649
- return {"passed": True, "output": "All tests passed", "error": None}
650
- else:
651
- error_msg = result.stderr or result.stdout or "Unknown error"
652
- return {"passed": False, "output": error_msg, "error": error_msg}
653
- except subprocess.TimeoutExpired:
654
- os.unlink(temp_file)
655
- return {"passed": False, "output": "Test timeout", "error": "TIMEOUT"}
656
- except Exception as e:
657
- return {"passed": False, "output": str(e), "error": str(e)}
658
-
659
- def reviewer_agent(problem, solution, qa_result):
660
- """Reviewer: Review solution quality and suggest improvements if tests failed."""
661
- if qa_result["passed"]:
662
- return {"approved": True, "feedback": "Solution passes all tests"}
663
-
664
- prompt = f'''You are the CODE REVIEWER AGENT in a multi-agent coding system.
665
-
666
- The QA agent found issues with this solution. Analyze and suggest fixes.
667
-
668
- PROBLEM:
669
- {problem["prompt"]}
670
-
671
- CURRENT SOLUTION:
672
- {solution}
673
-
674
- TEST ERROR:
675
- {qa_result["error"]}
676
-
677
- Analyze the error and provide:
678
- 1. What went wrong (1 line)
679
- 2. How to fix it (1-2 lines)
680
-
681
- Keep feedback concise - the Engineer will use it to fix the code.'''
682
-
683
- feedback, error = call_agent("Reviewer", prompt, timeout=30)
684
- return {"approved": False, "feedback": feedback or "No feedback", "error": error}
685
-
686
- def engineer_fix_agent(problem, solution, feedback, attempt):
687
- """Engineer: Fix the solution based on reviewer feedback."""
688
- prompt = f'''You are the ENGINEER AGENT. Your previous solution failed tests.
689
-
690
- PROBLEM:
691
- {problem["prompt"]}
692
-
693
- PREVIOUS SOLUTION:
694
- {solution}
695
-
696
- REVIEWER FEEDBACK:
697
- {feedback}
698
-
699
- ATTEMPT: {attempt}/{MAX_RETRIES}
700
-
701
- Fix the solution based on the feedback.
702
- Output the COMPLETE corrected function - no explanations, just code.'''
703
-
704
- return call_agent("Engineer-Fix", prompt)
705
-
706
- def solve_with_loki_mode(problem):
707
- """
708
- Solve a HumanEval problem using Loki Mode multi-agent system.
709
-
710
- Pipeline: Architect -> Engineer -> QA -> [Reviewer -> Engineer-Fix]* -> Pass/Fail
711
- """
712
- task_id = problem["task_id"]
713
- entry_point = problem["entry_point"]
714
-
715
- agent_trace = []
716
-
717
- # Step 1: Architect analyzes the problem
718
- architect_analysis, error = architect_agent(problem)
719
- agent_trace.append({"agent": "Architect", "output": architect_analysis, "error": error})
720
-
721
- if error:
722
- return {
723
- "task_id": task_id,
724
- "solution": None,
725
- "passed": False,
726
- "error": f"Architect failed: {error}",
727
- "attempts": 1,
728
- "agent_trace": agent_trace
729
- }
730
-
731
- # Step 2: Engineer implements solution
732
- solution, error = engineer_agent(problem, architect_analysis)
733
- agent_trace.append({"agent": "Engineer", "output": solution[:200] if solution else None, "error": error})
734
-
735
- if error or not solution:
736
- return {
737
- "task_id": task_id,
738
- "solution": None,
739
- "passed": False,
740
- "error": f"Engineer failed: {error}",
741
- "attempts": 1,
742
- "agent_trace": agent_trace
743
- }
744
-
745
- # Clean up solution
746
- if solution.startswith("```python"):
747
- solution = solution[9:]
748
- if solution.startswith("```"):
749
- solution = solution[3:]
750
- if solution.endswith("```"):
751
- solution = solution[:-3]
752
- solution = solution.strip()
753
-
754
- # Ensure function signature is present
755
- if f"def {entry_point}" not in solution:
756
- lines = solution.split('\n')
757
- indented_lines = [' ' + line if line.strip() and not line.startswith(' ') else line for line in lines]
758
- solution = problem["prompt"] + '\n'.join(indented_lines)
759
-
760
- # RARV Loop: QA -> Reviewer -> Engineer-Fix
761
- for attempt in range(1, MAX_RETRIES + 1):
762
- # Step 3: QA tests the solution
763
- qa_result = qa_agent(problem, solution)
764
- agent_trace.append({"agent": "QA", "passed": qa_result["passed"], "error": qa_result.get("error")})
765
-
766
- if qa_result["passed"]:
767
- return {
768
- "task_id": task_id,
769
- "solution": solution,
770
- "passed": True,
771
- "error": None,
772
- "attempts": attempt,
773
- "agent_trace": agent_trace
774
- }
775
-
776
- if attempt >= MAX_RETRIES:
777
- break
778
-
779
- # Step 4: Reviewer analyzes failure
780
- review = reviewer_agent(problem, solution, qa_result)
781
- agent_trace.append({"agent": "Reviewer", "feedback": review["feedback"][:200] if review["feedback"] else None})
782
-
783
- # Step 5: Engineer fixes based on feedback
784
- new_solution, error = engineer_fix_agent(problem, solution, review["feedback"], attempt + 1)
785
- agent_trace.append({"agent": f"Engineer-Fix-{attempt+1}", "output": new_solution[:200] if new_solution else None, "error": error})
786
-
787
- if new_solution and not error:
788
- # Clean up
789
- if new_solution.startswith("```python"):
790
- new_solution = new_solution[9:]
791
- if new_solution.startswith("```"):
792
- new_solution = new_solution[3:]
793
- if new_solution.endswith("```"):
794
- new_solution = new_solution[:-3]
795
- new_solution = new_solution.strip()
796
-
797
- if f"def {entry_point}" not in new_solution:
798
- lines = new_solution.split('\n')
799
- indented_lines = [' ' + line if line.strip() and not line.startswith(' ') else line for line in lines]
800
- new_solution = problem["prompt"] + '\n'.join(indented_lines)
801
-
802
- solution = new_solution
803
-
804
- return {
805
- "task_id": task_id,
806
- "solution": solution,
807
- "passed": False,
808
- "error": f"Failed after {MAX_RETRIES} RARV attempts",
809
- "attempts": MAX_RETRIES,
810
- "agent_trace": agent_trace
811
- }
812
-
813
- # Run benchmark
814
- results = {
815
- "benchmark": "HumanEval-LokiMode",
816
- "mode": "multi-agent",
817
- "version": "1.0",
818
- "timestamp": datetime.now().isoformat(),
819
- "model": CLAUDE_MODEL,
820
- "max_retries": MAX_RETRIES,
821
- "total_problems": len(problems),
822
- "problems": []
823
- }
824
-
825
- start_time = time.time()
826
- passed_count = 0
827
- failed_count = 0
828
- error_count = 0
829
- total_attempts = 0
830
-
831
- for i, problem in enumerate(problems):
832
- task_id = problem["task_id"]
833
- task_num = int(task_id.split("/")[1])
834
-
835
- print(f"[{i+1}/{len(problems)}] {task_id}...", end=" ", flush=True)
836
-
837
- problem_result = solve_with_loki_mode(problem)
838
-
839
- # Save solution
840
- solution_file = f"{solutions_dir}/{task_num}.py"
841
- with open(solution_file, 'w') as f:
842
- f.write(f"# {task_id}\n")
843
- f.write(f"# Loki Mode Multi-Agent Solution\n")
844
- f.write(f"# Attempts: {problem_result['attempts']}\n")
845
- f.write(f"# Passed: {problem_result['passed']}\n\n")
846
- if problem_result["solution"]:
847
- f.write(problem_result["solution"])
848
-
849
- # Track results
850
- total_attempts += problem_result["attempts"]
851
-
852
- if problem_result["passed"]:
853
- passed_count += 1
854
- attempts_str = f"(attempt {problem_result['attempts']})" if problem_result['attempts'] > 1 else ""
855
- print(f"\033[0;32mPASSED\033[0m {attempts_str}")
856
- elif problem_result["error"] and "failed" in problem_result["error"].lower():
857
- error_count += 1
858
- print(f"\033[0;31mERROR\033[0m - {problem_result['error'][:50]}")
859
- else:
860
- failed_count += 1
861
- print(f"\033[0;33mFAILED\033[0m after {problem_result['attempts']} attempts")
862
-
863
- # Store result (without full trace to save space)
864
- results["problems"].append({
865
- "task_id": task_id,
866
- "passed": problem_result["passed"],
867
- "attempts": problem_result["attempts"],
868
- "error": problem_result.get("error")
869
- })
870
-
871
- elapsed_time = time.time() - start_time
872
-
873
- # Final results
874
- results["passed"] = passed_count
875
- results["failed"] = failed_count
876
- results["errors"] = error_count
877
- results["pass_rate"] = (passed_count / len(problems)) * 100 if problems else 0
878
- results["avg_attempts"] = total_attempts / len(problems) if problems else 0
879
- results["elapsed_time"] = elapsed_time
880
-
881
- with open(results_file, 'w') as f:
882
- json.dump(results, f, indent=2)
883
-
884
- pass_rate = results["pass_rate"]
885
- avg_attempts = results["avg_attempts"]
886
-
887
- print(f"\n{'='*70}")
888
- print(f" LOKI MODE RESULTS")
889
- print(f"{'='*70}")
890
- print(f" Passed: {passed_count}/{len(problems)} ({pass_rate:.1f}%)")
891
- print(f" Failed: {failed_count}/{len(problems)}")
892
- print(f" Errors: {error_count}/{len(problems)}")
893
- print(f" Avg Attempts: {avg_attempts:.2f}")
894
- print(f" Time: {elapsed_time:.1f}s ({elapsed_time/len(problems):.1f}s avg)")
895
- print(f"{'='*70}")
896
- print(f"\n Comparison (baseline: MetaGPT 85.9-87.7%):")
897
- print(f" - MetaGPT (multi-agent): 85.9-87.7%")
898
- print(f" - Direct Claude: 98.17% (from previous run)")
899
- print(f" - Loki Mode (multi-agent): {pass_rate:.1f}%")
900
- if pass_rate >= 98:
901
- print(f" Status: \033[0;32mEXCELLENT - Beats both!\033[0m")
902
- elif pass_rate >= 90:
903
- print(f" Status: \033[0;32mGREAT - Beats MetaGPT\033[0m")
904
- elif pass_rate >= 85:
905
- print(f" Status: \033[0;33mCOMPETITIVE with MetaGPT\033[0m")
906
- else:
907
- print(f" Status: \033[0;31mBELOW MetaGPT baseline\033[0m")
908
- print(f"{'='*70}\n")
909
- HUMANEVAL_LOKI
910
-
911
- log_success "Loki Mode HumanEval benchmark complete"
912
- log_info "Results: $results_file"
913
- log_info "Solutions: $solutions_dir/"
914
- }
915
-
916
- #===============================================================================
917
- # SWE-bench Benchmark
918
- #===============================================================================
919
-
920
- download_swebench() {
921
- local dataset_file="$SCRIPT_DIR/datasets/swebench-lite.json"
922
-
923
- if [ -f "$dataset_file" ]; then
924
- log_info "SWE-bench Lite dataset already downloaded"
925
- return
926
- fi
927
-
928
- log_info "Downloading SWE-bench Lite dataset..."
929
-
930
- python3 << 'SWEBENCH_DOWNLOAD'
931
- import json
932
- import os
933
-
934
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
935
-
936
- # Create placeholder dataset structure
937
- dataset = {
938
- "name": "SWE-bench Lite",
939
- "version": "1.0",
940
- "description": "300 real-world GitHub issues for evaluation",
941
- "source": "https://github.com/SWE-bench/SWE-bench",
942
- "problems": 300,
943
- "status": "PLACEHOLDER",
944
- "install_command": "pip install swebench",
945
- "run_command": "python -m swebench.harness.run_evaluation"
946
- }
947
-
948
- with open(f"{SCRIPT_DIR}/datasets/swebench-lite.json", 'w') as f:
949
- json.dump(dataset, f, indent=2)
950
-
951
- print("SWE-bench Lite metadata saved")
952
- SWEBENCH_DOWNLOAD
953
-
954
- log_success "SWE-bench Lite dataset metadata ready"
955
- }
956
-
957
- run_swebench() {
958
- log_info "Running SWE-bench Lite benchmark..."
959
-
960
- download_swebench
961
-
962
- if [ "$EXECUTE_MODE" = true ]; then
963
- if [ "$LOKI_MODE" = true ]; then
964
- run_swebench_loki
965
- else
966
- run_swebench_execute
967
- fi
968
- else
969
- run_swebench_setup
970
- fi
971
- }
972
-
973
- run_swebench_setup() {
974
- local results_file="$RESULTS_DIR/swebench-results.json"
975
-
976
- python3 << 'SWEBENCH_SETUP'
977
- import json
978
- import os
979
- from datetime import datetime
980
-
981
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
982
-
983
- results = {
984
- "benchmark": "SWE-bench Lite",
985
- "version": "1.0",
986
- "timestamp": datetime.now().isoformat(),
987
- "total_problems": 300,
988
- "status": "INFRASTRUCTURE_READY",
989
- "note": "Install swebench package for full evaluation.",
990
- "install": "pip install swebench",
991
- "evaluation": "python -m swebench.harness.run_evaluation --predictions predictions.json"
992
- }
993
-
994
- with open(f"{RESULTS_DIR}/swebench-results.json", 'w') as f:
995
- json.dump(results, f, indent=2)
996
-
997
- print(f"Results saved to {RESULTS_DIR}/swebench-results.json")
998
- SWEBENCH_SETUP
999
-
1000
- log_success "SWE-bench benchmark infrastructure ready"
1001
- log_info "Results: $RESULTS_DIR/swebench-results.json"
1002
- }
1003
-
1004
- run_swebench_execute() {
1005
- log_info "Executing SWE-bench Lite benchmark..."
1006
-
1007
- # Check if swebench is installed
1008
- if ! python3 -c "import swebench" 2>/dev/null; then
1009
- log_warning "SWE-bench package not installed. Installing..."
1010
- pip install -q swebench datasets
1011
- fi
1012
-
1013
- export PROBLEM_LIMIT PROBLEM_TIMEOUT CLAUDE_MODEL
1014
-
1015
- python3 << 'SWEBENCH_EXECUTE'
1016
- import json
1017
- import subprocess
1018
- import os
1019
- import sys
1020
- import time
1021
- import tempfile
1022
- import shutil
1023
- from datetime import datetime
1024
-
1025
- try:
1026
- from datasets import load_dataset
1027
- from swebench.harness.constants import MAP_REPO_TO_TEST_FRAMEWORK
1028
- except ImportError:
1029
- print("Installing SWE-bench dependencies...")
1030
- subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'swebench', 'datasets'])
1031
- from datasets import load_dataset
1032
-
1033
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
1034
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
1035
- PROBLEM_LIMIT = int(os.environ.get('PROBLEM_LIMIT', '10')) # Default to 10 for SWE-bench
1036
- PROBLEM_TIMEOUT = int(os.environ.get('PROBLEM_TIMEOUT', '300'))
1037
- CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL', 'sonnet')
1038
-
1039
- results_file = f"{RESULTS_DIR}/swebench-results.json"
1040
- patches_dir = f"{RESULTS_DIR}/swebench-patches"
1041
- os.makedirs(patches_dir, exist_ok=True)
1042
-
1043
- print(f"\n{'='*60}")
1044
- print(f" SWE-bench Lite Benchmark Execution")
1045
- print(f" Limit: {PROBLEM_LIMIT} | Model: {CLAUDE_MODEL}")
1046
- print(f"{'='*60}\n")
1047
-
1048
- # Load SWE-bench Lite dataset
1049
- print("Loading SWE-bench Lite dataset...")
1050
- try:
1051
- dataset = load_dataset("princeton-nlp/SWE-bench_Lite", split="test")
1052
- problems = list(dataset)[:PROBLEM_LIMIT]
1053
- print(f"Loaded {len(problems)} problems")
1054
- except Exception as e:
1055
- print(f"Error loading dataset: {e}")
1056
- print("Using placeholder results...")
1057
- results = {
1058
- "benchmark": "SWE-bench Lite",
1059
- "version": "1.0",
1060
- "timestamp": datetime.now().isoformat(),
1061
- "status": "DATASET_ERROR",
1062
- "error": str(e),
1063
- "note": "Could not load SWE-bench dataset. Check network and try again."
1064
- }
1065
- with open(results_file, 'w') as f:
1066
- json.dump(results, f, indent=2)
1067
- sys.exit(1)
1068
-
1069
- def solve_swebench_problem(problem):
1070
- """Generate a patch for a SWE-bench problem using Claude."""
1071
- instance_id = problem["instance_id"]
1072
- repo = problem["repo"]
1073
- base_commit = problem["base_commit"]
1074
- problem_statement = problem["problem_statement"]
1075
- hints = problem.get("hints_text", "")
1076
-
1077
- # Create prompt for Claude
1078
- prompt = f'''You are solving a real GitHub issue from the {repo} repository.
1079
-
1080
- ## Problem Statement
1081
- {problem_statement}
1082
-
1083
- ## Hints
1084
- {hints if hints else "No hints available."}
1085
-
1086
- ## Task
1087
- Generate a git patch (unified diff format) that fixes this issue.
1088
-
1089
- Output ONLY the patch content in unified diff format. Example format:
1090
- --- a/file.py
1091
- +++ b/file.py
1092
- @@ -10,6 +10,7 @@
1093
- existing line
1094
- +new line
1095
- existing line
1096
-
1097
- Do not include any explanation or markdown code blocks. Just the raw patch.'''
1098
-
1099
- try:
1100
- result = subprocess.run(
1101
- ['claude', '-p', prompt, '--model', CLAUDE_MODEL],
1102
- capture_output=True,
1103
- text=True,
1104
- timeout=PROBLEM_TIMEOUT
1105
- )
1106
-
1107
- patch = result.stdout.strip()
1108
-
1109
- # Clean up patch if wrapped in markdown
1110
- if patch.startswith("```"):
1111
- lines = patch.split("\n")
1112
- patch = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
1113
-
1114
- return {
1115
- "instance_id": instance_id,
1116
- "model_patch": patch,
1117
- "error": None
1118
- }
1119
- except subprocess.TimeoutExpired:
1120
- return {"instance_id": instance_id, "model_patch": None, "error": "TIMEOUT"}
1121
- except Exception as e:
1122
- return {"instance_id": instance_id, "model_patch": None, "error": str(e)}
1123
-
1124
- # Run benchmark
1125
- results = {
1126
- "benchmark": "SWE-bench Lite",
1127
- "version": "1.0",
1128
- "timestamp": datetime.now().isoformat(),
1129
- "model": CLAUDE_MODEL,
1130
- "timeout_per_problem": PROBLEM_TIMEOUT,
1131
- "total_problems": len(problems),
1132
- "status": "RUNNING",
1133
- "predictions": []
1134
- }
1135
-
1136
- generated_count = 0
1137
- error_count = 0
1138
- start_time = time.time()
1139
-
1140
- for i, problem in enumerate(problems):
1141
- instance_id = problem["instance_id"]
1142
-
1143
- print(f"[{i+1}/{len(problems)}] {instance_id}...", end=" ", flush=True)
1144
-
1145
- solution = solve_swebench_problem(problem)
1146
-
1147
- if solution["error"]:
1148
- print(f"\033[0;31mERROR: {solution['error']}\033[0m")
1149
- error_count += 1
1150
- else:
1151
- print(f"\033[0;32mGENERATED\033[0m")
1152
- generated_count += 1
1153
-
1154
- # Save patch
1155
- patch_file = f"{patches_dir}/{instance_id.replace('/', '_')}.patch"
1156
- with open(patch_file, 'w') as f:
1157
- f.write(solution["model_patch"])
1158
-
1159
- # Add to predictions (format required by SWE-bench evaluator)
1160
- results["predictions"].append({
1161
- "instance_id": instance_id,
1162
- "model_patch": solution["model_patch"] or "",
1163
- "model_name_or_path": f"loki-mode-{CLAUDE_MODEL}"
1164
- })
1165
-
1166
- # Save intermediate results
1167
- with open(results_file, 'w') as f:
1168
- json.dump(results, f, indent=2)
1169
-
1170
- # Save predictions file for SWE-bench evaluator
1171
- predictions_file = f"{RESULTS_DIR}/swebench-predictions.json"
1172
- with open(predictions_file, 'w') as f:
1173
- json.dump(results["predictions"], f, indent=2)
1174
-
1175
- elapsed_time = time.time() - start_time
1176
-
1177
- results["status"] = "PATCHES_GENERATED"
1178
- results["generated"] = generated_count
1179
- results["errors"] = error_count
1180
- results["elapsed_seconds"] = round(elapsed_time, 2)
1181
- results["predictions_file"] = predictions_file
1182
- results["next_step"] = "Run: python -m swebench.harness.run_evaluation --predictions " + predictions_file
1183
-
1184
- with open(results_file, 'w') as f:
1185
- json.dump(results, f, indent=2)
1186
-
1187
- print(f"\n{'='*60}")
1188
- print(f" RESULTS")
1189
- print(f"{'='*60}")
1190
- print(f" Generated: {generated_count}/{len(problems)}")
1191
- print(f" Errors: {error_count}/{len(problems)}")
1192
- print(f" Time: {elapsed_time:.1f}s")
1193
- print(f"{'='*60}")
1194
- print(f"\n Next Step: Run SWE-bench evaluator")
1195
- print(f" python -m swebench.harness.run_evaluation \\")
1196
- print(f" --predictions {predictions_file} \\")
1197
- print(f" --max_workers 4")
1198
- print(f"{'='*60}\n")
1199
- SWEBENCH_EXECUTE
1200
-
1201
- log_success "SWE-bench patch generation complete"
1202
- log_info "Results: $RESULTS_DIR/swebench-results.json"
1203
- log_info "Predictions: $RESULTS_DIR/swebench-predictions.json"
1204
- }
1205
-
1206
- #===============================================================================
1207
- # Loki Mode Multi-Agent SWE-bench Benchmark
1208
- # Uses: Architect -> Engineer -> QA -> Reviewer with RARV cycle
1209
- #===============================================================================
1210
-
1211
- run_swebench_loki() {
1212
- log_info "Executing SWE-bench Lite with Loki Mode Multi-Agent System..."
1213
- log_info "Model: $CLAUDE_MODEL | Retries: $MAX_RETRIES | Limit: ${PROBLEM_LIMIT:-all}"
1214
- log_info "Agents: Architect -> Engineer -> QA -> Reviewer (RARV cycle)"
1215
- log_info "Trajectory logging: ENABLED (for official submission)"
1216
-
1217
- # Check if swebench is installed
1218
- if ! python3 -c "import swebench" 2>/dev/null; then
1219
- log_warning "SWE-bench package not installed. Installing..."
1220
- pip install -q swebench datasets
1221
- fi
1222
-
1223
- export PROBLEM_LIMIT PROBLEM_TIMEOUT CLAUDE_MODEL MAX_RETRIES
1224
-
1225
- python3 << 'SWEBENCH_LOKI'
1226
- import json
1227
- import subprocess
1228
- import os
1229
- import sys
1230
- import time
1231
- import re
1232
- from datetime import datetime
1233
-
1234
- try:
1235
- from datasets import load_dataset
1236
- except ImportError:
1237
- subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'swebench', 'datasets'])
1238
- from datasets import load_dataset
1239
-
1240
- SCRIPT_DIR = os.environ.get('SCRIPT_DIR', '.')
1241
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
1242
- PROBLEM_LIMIT = int(os.environ.get('PROBLEM_LIMIT', '0'))
1243
- PROBLEM_TIMEOUT = int(os.environ.get('PROBLEM_TIMEOUT', '300'))
1244
- CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL', 'sonnet')
1245
- MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '3'))
1246
-
1247
- results_file = f"{RESULTS_DIR}/swebench-loki-results.json"
1248
- patches_dir = f"{RESULTS_DIR}/swebench-loki-patches"
1249
- trajs_dir = f"{RESULTS_DIR}/trajs" # Trajectory logs for official submission
1250
- logs_dir = f"{RESULTS_DIR}/logs" # Execution logs for official submission
1251
- os.makedirs(patches_dir, exist_ok=True)
1252
- os.makedirs(trajs_dir, exist_ok=True)
1253
- os.makedirs(logs_dir, exist_ok=True)
1254
-
1255
- print(f"\n{'='*70}")
1256
- print(f" LOKI MODE Multi-Agent SWE-bench Lite Benchmark")
1257
- print(f" Limit: {PROBLEM_LIMIT if PROBLEM_LIMIT > 0 else 'all'} | Model: {CLAUDE_MODEL} | Max Retries: {MAX_RETRIES}")
1258
- print(f" Agent Pipeline: Architect -> Engineer -> QA -> Reviewer")
1259
- print(f"{'='*70}\n")
1260
-
1261
- # Load dataset
1262
- print("Loading SWE-bench Lite dataset...")
1263
- try:
1264
- dataset = load_dataset("princeton-nlp/SWE-bench_Lite", split="test")
1265
- problems = list(dataset)
1266
- if PROBLEM_LIMIT > 0:
1267
- problems = problems[:PROBLEM_LIMIT]
1268
- print(f"Loaded {len(problems)} problems")
1269
- except Exception as e:
1270
- print(f"Error loading dataset: {e}")
1271
- sys.exit(1)
1272
-
1273
- def call_agent(agent_name, prompt, timeout=PROBLEM_TIMEOUT):
1274
- """Call a Loki Mode agent with a specific role. Returns (output, error, metadata)."""
1275
- start_time = time.time()
1276
- try:
1277
- result = subprocess.run(
1278
- ['claude', '-p', prompt, '--model', CLAUDE_MODEL],
1279
- capture_output=True,
1280
- text=True,
1281
- timeout=timeout
1282
- )
1283
- elapsed = time.time() - start_time
1284
- return result.stdout.strip(), None, {
1285
- "agent": agent_name,
1286
- "model": CLAUDE_MODEL,
1287
- "elapsed_seconds": round(elapsed, 2),
1288
- "prompt_length": len(prompt),
1289
- "output_length": len(result.stdout),
1290
- "timestamp": datetime.now().isoformat()
1291
- }
1292
- except subprocess.TimeoutExpired:
1293
- elapsed = time.time() - start_time
1294
- return None, "TIMEOUT", {
1295
- "agent": agent_name,
1296
- "model": CLAUDE_MODEL,
1297
- "elapsed_seconds": round(elapsed, 2),
1298
- "error": "TIMEOUT",
1299
- "timestamp": datetime.now().isoformat()
1300
- }
1301
- except Exception as e:
1302
- return None, str(e), {
1303
- "agent": agent_name,
1304
- "error": str(e),
1305
- "timestamp": datetime.now().isoformat()
1306
- }
1307
-
1308
- def architect_agent(problem):
1309
- """Architect: Analyze the issue and design the fix approach."""
1310
- prompt = f'''You are the ARCHITECT AGENT analyzing a GitHub issue.
1311
-
1312
- REPOSITORY: {problem["repo"]}
1313
- ISSUE:
1314
- {problem["problem_statement"]}
1315
-
1316
- HINTS:
1317
- {problem.get("hints_text", "No hints available.")}
1318
-
1319
- Your job:
1320
- 1. Understand what the issue is about
1321
- 2. Identify which file(s) likely need to be changed
1322
- 3. Describe the fix approach (2-3 sentences)
1323
- 4. Note any edge cases
1324
-
1325
- Output a brief analysis (5-7 lines max) with:
1326
- - What the bug/issue is
1327
- - Files likely affected
1328
- - Fix strategy
1329
-
1330
- Keep it concise - the Engineer agent will generate the patch.'''
1331
-
1332
- output, error, metadata = call_agent("Architect", prompt, timeout=120)
1333
- metadata["prompt"] = prompt
1334
- metadata["output"] = output
1335
- return output, error, metadata
1336
-
1337
- def engineer_agent(problem, architect_analysis):
1338
- """Engineer: Generate the patch based on architect's analysis."""
1339
- prompt = f'''You are the ENGINEER AGENT generating a patch for a GitHub issue.
1340
-
1341
- REPOSITORY: {problem["repo"]}
1342
- ISSUE:
1343
- {problem["problem_statement"]}
1344
-
1345
- ARCHITECT'S ANALYSIS:
1346
- {architect_analysis}
1347
-
1348
- Generate a git patch (unified diff format) that fixes this issue.
1349
-
1350
- IMPORTANT:
1351
- 1. Output ONLY the patch in unified diff format
1352
- 2. Include proper file paths with a/ and b/ prefixes
1353
- 3. Include @@ line numbers
1354
- 4. No explanations, no markdown code blocks, just raw patch
1355
-
1356
- Example format:
1357
- --- a/path/to/file.py
1358
- +++ b/path/to/file.py
1359
- @@ -10,6 +10,7 @@
1360
- existing line
1361
- +new line
1362
- existing line
1363
-
1364
- Generate the patch now:'''
1365
-
1366
- output, error, metadata = call_agent("Engineer", prompt)
1367
- metadata["prompt"] = prompt
1368
- metadata["output"] = output
1369
- return output, error, metadata
1370
-
1371
- def qa_agent(patch):
1372
- """QA: Validate the patch format. Returns validation result with metadata."""
1373
- start_time = time.time()
1374
-
1375
- if not patch:
1376
- return {"valid": False, "error": "Empty patch", "checks": [], "timestamp": datetime.now().isoformat()}
1377
-
1378
- checks = []
1379
-
1380
- # Check for basic patch structure
1381
- has_diff_header = "---" in patch and "+++" in patch
1382
- checks.append({"check": "diff_headers", "passed": has_diff_header})
1383
-
1384
- has_hunk_header = "@@" in patch
1385
- checks.append({"check": "hunk_headers", "passed": has_hunk_header})
1386
-
1387
- has_changes = "+" in patch or "-" in patch
1388
- checks.append({"check": "has_changes", "passed": has_changes})
1389
-
1390
- # Check for markdown wrapping (common error)
1391
- is_wrapped = patch.startswith("```")
1392
- checks.append({"check": "no_markdown_wrap", "passed": not is_wrapped})
1393
-
1394
- # Check for proper file paths
1395
- has_path_prefixes = "a/" in patch and "b/" in patch
1396
- checks.append({"check": "path_prefixes", "passed": has_path_prefixes})
1397
-
1398
- elapsed = time.time() - start_time
1399
-
1400
- if is_wrapped:
1401
- return {"valid": False, "error": "Patch wrapped in markdown code blocks", "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1402
-
1403
- if not has_diff_header:
1404
- return {"valid": False, "error": "Missing diff headers (--- and +++)", "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1405
-
1406
- if not has_hunk_header:
1407
- return {"valid": False, "error": "Missing hunk headers (@@)", "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1408
-
1409
- if not has_changes:
1410
- return {"valid": False, "error": "No actual changes in patch", "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1411
-
1412
- if not has_path_prefixes:
1413
- return {"valid": False, "error": "Missing a/ or b/ path prefixes", "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1414
-
1415
- return {"valid": True, "error": None, "checks": checks, "elapsed_seconds": round(elapsed, 2), "timestamp": datetime.now().isoformat()}
1416
-
1417
- def reviewer_agent(problem, patch, qa_result):
1418
- """Reviewer: Analyze patch issues and suggest fixes."""
1419
- if qa_result["valid"]:
1420
- return {"approved": True, "feedback": "Patch format is valid", "metadata": {"agent": "Reviewer", "skipped": True, "timestamp": datetime.now().isoformat()}}
1421
-
1422
- prompt = f'''You are the CODE REVIEWER AGENT. The generated patch has format issues.
1423
-
1424
- ISSUE:
1425
- {problem["problem_statement"][:500]}
1426
-
1427
- CURRENT PATCH:
1428
- {patch[:1000] if patch else "Empty"}
1429
-
1430
- FORMAT ERROR:
1431
- {qa_result["error"]}
1432
-
1433
- Provide brief feedback (2-3 lines) on how to fix the patch format:
1434
- - What's wrong
1435
- - How to fix it'''
1436
-
1437
- feedback, error, metadata = call_agent("Reviewer", prompt, timeout=60)
1438
- metadata["prompt"] = prompt
1439
- metadata["output"] = feedback
1440
- return {"approved": False, "feedback": feedback or qa_result["error"], "error": error, "metadata": metadata}
1441
-
1442
- def engineer_fix_agent(problem, patch, feedback, attempt):
1443
- """Engineer: Fix the patch based on reviewer feedback."""
1444
- prompt = f'''You are the ENGINEER AGENT. Your previous patch had format issues.
1445
-
1446
- ISSUE:
1447
- {problem["problem_statement"][:500]}
1448
-
1449
- PREVIOUS PATCH:
1450
- {patch[:1000] if patch else "Empty"}
1451
-
1452
- REVIEWER FEEDBACK:
1453
- {feedback}
1454
-
1455
- ATTEMPT: {attempt}/{MAX_RETRIES}
1456
-
1457
- Generate a CORRECTED patch in proper unified diff format.
1458
- Output ONLY the raw patch - no explanations, no markdown.
1459
-
1460
- --- a/path/to/file.py
1461
- +++ b/path/to/file.py
1462
- @@ -line,count +line,count @@
1463
- ...'''
1464
-
1465
- output, error, metadata = call_agent("Engineer-Fix", prompt)
1466
- metadata["prompt"] = prompt
1467
- metadata["output"] = output
1468
- metadata["attempt"] = attempt
1469
- return output, error, metadata
1470
-
1471
- def clean_patch(patch):
1472
- """Clean up patch by removing markdown wrapping."""
1473
- if not patch:
1474
- return patch
1475
-
1476
- if patch.startswith("```"):
1477
- lines = patch.split("\n")
1478
- # Remove first and last lines if they're markdown
1479
- if lines[0].startswith("```"):
1480
- lines = lines[1:]
1481
- if lines and lines[-1].strip() == "```":
1482
- lines = lines[:-1]
1483
- patch = "\n".join(lines)
1484
-
1485
- return patch.strip()
1486
-
1487
- def save_trajectory(instance_id, trajectory_steps):
1488
- """Save the full reasoning trajectory to a file for official submission."""
1489
- safe_id = instance_id.replace("/", "_").replace(":", "_")
1490
- traj_file = f"{trajs_dir}/{safe_id}.md"
1491
-
1492
- with open(traj_file, 'w') as f:
1493
- f.write(f"# Trajectory: {instance_id}\n\n")
1494
- f.write(f"**Generated by:** Loki Mode Multi-Agent System\n")
1495
- f.write(f"**Model:** {CLAUDE_MODEL}\n")
1496
- f.write(f"**Timestamp:** {datetime.now().isoformat()}\n\n")
1497
- f.write("---\n\n")
1498
-
1499
- for i, step in enumerate(trajectory_steps, 1):
1500
- f.write(f"## Step {i}: {step['agent']}\n\n")
1501
- f.write(f"**Timestamp:** {step.get('timestamp', 'N/A')}\n")
1502
- f.write(f"**Duration:** {step.get('elapsed_seconds', 'N/A')}s\n\n")
1503
-
1504
- if step.get('prompt'):
1505
- f.write("### Prompt\n\n```\n")
1506
- f.write(step['prompt'][:2000])
1507
- if len(step.get('prompt', '')) > 2000:
1508
- f.write("\n... (truncated)")
1509
- f.write("\n```\n\n")
1510
-
1511
- if step.get('output'):
1512
- f.write("### Output\n\n```\n")
1513
- f.write(step['output'])
1514
- f.write("\n```\n\n")
1515
-
1516
- if step.get('error'):
1517
- f.write(f"### Error\n\n`{step['error']}`\n\n")
1518
-
1519
- if step.get('checks'):
1520
- f.write("### Validation Checks\n\n")
1521
- for check in step['checks']:
1522
- status = "PASS" if check['passed'] else "FAIL"
1523
- f.write(f"- {check['check']}: {status}\n")
1524
- f.write("\n")
1525
-
1526
- f.write("---\n\n")
1527
-
1528
- return traj_file
1529
-
1530
- def save_logs(instance_id, patch, result):
1531
- """Save execution logs for official submission."""
1532
- safe_id = instance_id.replace("/", "_").replace(":", "_")
1533
- log_dir = f"{logs_dir}/{safe_id}"
1534
- os.makedirs(log_dir, exist_ok=True)
1535
-
1536
- # Save patch.diff
1537
- patch_file = f"{log_dir}/patch.diff"
1538
- with open(patch_file, 'w') as f:
1539
- f.write(patch or "")
1540
-
1541
- # Save report.json
1542
- report_file = f"{log_dir}/report.json"
1543
- report = {
1544
- "instance_id": instance_id,
1545
- "model_name_or_path": f"loki-mode-{CLAUDE_MODEL}",
1546
- "model_patch": patch or "",
1547
- "attempts": result.get("attempts", 1),
1548
- "success": result.get("error") is None,
1549
- "error": result.get("error"),
1550
- "timestamp": datetime.now().isoformat()
1551
- }
1552
- with open(report_file, 'w') as f:
1553
- json.dump(report, f, indent=2)
1554
-
1555
- # Save test_output.txt (placeholder - would be filled by actual test run)
1556
- test_file = f"{log_dir}/test_output.txt"
1557
- with open(test_file, 'w') as f:
1558
- f.write(f"# Test output for {instance_id}\n")
1559
- f.write(f"# Generated by Loki Mode\n")
1560
- f.write(f"# Note: Run SWE-bench harness for actual test results\n\n")
1561
- f.write(f"Patch generated: {'Yes' if patch else 'No'}\n")
1562
- f.write(f"Attempts: {result.get('attempts', 1)}\n")
1563
- f.write(f"Error: {result.get('error', 'None')}\n")
1564
-
1565
- return log_dir
1566
-
1567
- def solve_with_loki_mode(problem):
1568
- """Solve SWE-bench problem using Loki Mode multi-agent system with full trajectory logging."""
1569
- instance_id = problem["instance_id"]
1570
- trajectory_steps = [] # Full trajectory for official submission
1571
- agent_trace = [] # Summary trace for results JSON
1572
-
1573
- # Step 1: Architect analyzes the issue
1574
- architect_analysis, error, arch_meta = architect_agent(problem)
1575
- trajectory_steps.append(arch_meta)
1576
- agent_trace.append({"agent": "Architect", "output": architect_analysis[:200] if architect_analysis else None, "error": error})
1577
-
1578
- if error:
1579
- result = {
1580
- "instance_id": instance_id,
1581
- "model_patch": None,
1582
- "error": f"Architect failed: {error}",
1583
- "attempts": 1,
1584
- "agent_trace": agent_trace
1585
- }
1586
- save_trajectory(instance_id, trajectory_steps)
1587
- save_logs(instance_id, None, result)
1588
- return result
1589
-
1590
- # Step 2: Engineer generates patch
1591
- patch, error, eng_meta = engineer_agent(problem, architect_analysis)
1592
- trajectory_steps.append(eng_meta)
1593
- agent_trace.append({"agent": "Engineer", "output": patch[:200] if patch else None, "error": error})
1594
-
1595
- if error or not patch:
1596
- result = {
1597
- "instance_id": instance_id,
1598
- "model_patch": None,
1599
- "error": f"Engineer failed: {error}",
1600
- "attempts": 1,
1601
- "agent_trace": agent_trace
1602
- }
1603
- save_trajectory(instance_id, trajectory_steps)
1604
- save_logs(instance_id, None, result)
1605
- return result
1606
-
1607
- patch = clean_patch(patch)
1608
-
1609
- # RARV Loop: QA -> Reviewer -> Engineer-Fix
1610
- for attempt in range(1, MAX_RETRIES + 1):
1611
- # Step 3: QA validates patch format
1612
- qa_result = qa_agent(patch)
1613
- trajectory_steps.append({
1614
- "agent": "QA",
1615
- "timestamp": qa_result.get("timestamp"),
1616
- "elapsed_seconds": qa_result.get("elapsed_seconds"),
1617
- "output": f"Valid: {qa_result['valid']}, Error: {qa_result.get('error')}",
1618
- "checks": qa_result.get("checks", [])
1619
- })
1620
- agent_trace.append({"agent": "QA", "valid": qa_result["valid"], "error": qa_result.get("error")})
1621
-
1622
- if qa_result["valid"]:
1623
- result = {
1624
- "instance_id": instance_id,
1625
- "model_patch": patch,
1626
- "error": None,
1627
- "attempts": attempt,
1628
- "agent_trace": agent_trace
1629
- }
1630
- save_trajectory(instance_id, trajectory_steps)
1631
- save_logs(instance_id, patch, result)
1632
- return result
1633
-
1634
- if attempt >= MAX_RETRIES:
1635
- break
1636
-
1637
- # Step 4: Reviewer analyzes issues
1638
- review = reviewer_agent(problem, patch, qa_result)
1639
- if review.get("metadata"):
1640
- trajectory_steps.append(review["metadata"])
1641
- agent_trace.append({"agent": "Reviewer", "feedback": review["feedback"][:200] if review.get("feedback") else None})
1642
-
1643
- # Step 5: Engineer fixes patch
1644
- new_patch, error, fix_meta = engineer_fix_agent(problem, patch, review["feedback"], attempt + 1)
1645
- trajectory_steps.append(fix_meta)
1646
- agent_trace.append({"agent": f"Engineer-Fix-{attempt+1}", "output": new_patch[:200] if new_patch else None, "error": error})
1647
-
1648
- if new_patch and not error:
1649
- patch = clean_patch(new_patch)
1650
-
1651
- # Return even if format isn't perfect - let SWE-bench evaluator handle it
1652
- result = {
1653
- "instance_id": instance_id,
1654
- "model_patch": patch,
1655
- "error": f"Format issues after {MAX_RETRIES} attempts",
1656
- "attempts": MAX_RETRIES,
1657
- "agent_trace": agent_trace
1658
- }
1659
- save_trajectory(instance_id, trajectory_steps)
1660
- save_logs(instance_id, patch, result)
1661
- return result
1662
-
1663
- # Run benchmark
1664
- results = {
1665
- "benchmark": "SWE-bench-LokiMode",
1666
- "mode": "multi-agent",
1667
- "version": "1.0",
1668
- "timestamp": datetime.now().isoformat(),
1669
- "model": CLAUDE_MODEL,
1670
- "max_retries": MAX_RETRIES,
1671
- "total_problems": len(problems),
1672
- "predictions": []
1673
- }
1674
-
1675
- start_time = time.time()
1676
- generated_count = 0
1677
- fixed_by_rarv = 0
1678
- error_count = 0
1679
- total_attempts = 0
1680
-
1681
- for i, problem in enumerate(problems):
1682
- instance_id = problem["instance_id"]
1683
-
1684
- print(f"[{i+1}/{len(problems)}] {instance_id}...", end=" ", flush=True)
1685
-
1686
- result = solve_with_loki_mode(problem)
1687
- total_attempts += result["attempts"]
1688
-
1689
- # Save patch
1690
- patch_file = f"{patches_dir}/{instance_id.replace('/', '_')}.patch"
1691
- with open(patch_file, 'w') as f:
1692
- f.write(f"# {instance_id}\n")
1693
- f.write(f"# Loki Mode Multi-Agent Patch\n")
1694
- f.write(f"# Attempts: {result['attempts']}\n\n")
1695
- if result["model_patch"]:
1696
- f.write(result["model_patch"])
1697
-
1698
- if result["model_patch"] and not (result.get("error") or "").startswith("Format"):
1699
- generated_count += 1
1700
- if result["attempts"] > 1:
1701
- fixed_by_rarv += 1
1702
- print(f"\033[0;32mGENERATED\033[0m (fixed on attempt {result['attempts']})")
1703
- else:
1704
- print(f"\033[0;32mGENERATED\033[0m")
1705
- elif result["model_patch"]:
1706
- generated_count += 1
1707
- print(f"\033[0;33mGENERATED\033[0m (format issues)")
1708
- else:
1709
- error_count += 1
1710
- print(f"\033[0;31mERROR\033[0m - {result.get('error', 'Unknown')[:40]}")
1711
-
1712
- # Add to predictions
1713
- results["predictions"].append({
1714
- "instance_id": instance_id,
1715
- "model_patch": result["model_patch"] or "",
1716
- "model_name_or_path": f"loki-mode-{CLAUDE_MODEL}",
1717
- "attempts": result["attempts"]
1718
- })
1719
-
1720
- elapsed_time = time.time() - start_time
1721
-
1722
- # Save results
1723
- results["generated"] = generated_count
1724
- results["fixed_by_rarv"] = fixed_by_rarv
1725
- results["errors"] = error_count
1726
- results["avg_attempts"] = total_attempts / len(problems) if problems else 0
1727
- results["elapsed_time"] = elapsed_time
1728
-
1729
- with open(results_file, 'w') as f:
1730
- json.dump(results, f, indent=2)
1731
-
1732
- # Save predictions for SWE-bench evaluator
1733
- predictions_file = f"{RESULTS_DIR}/swebench-loki-predictions.json"
1734
- with open(predictions_file, 'w') as f:
1735
- json.dump(results["predictions"], f, indent=2)
1736
-
1737
- gen_rate = (generated_count / len(problems)) * 100 if problems else 0
1738
-
1739
- print(f"\n{'='*70}")
1740
- print(f" LOKI MODE SWE-BENCH RESULTS")
1741
- print(f"{'='*70}")
1742
- print(f" Generated: {generated_count}/{len(problems)} ({gen_rate:.1f}%)")
1743
- print(f" Fixed by RARV: {fixed_by_rarv}")
1744
- print(f" Errors: {error_count}/{len(problems)}")
1745
- print(f" Avg Attempts: {results['avg_attempts']:.2f}")
1746
- print(f" Time: {elapsed_time:.1f}s ({elapsed_time/len(problems):.1f}s avg)")
1747
- print(f"{'='*70}")
1748
- print(f"\n Output Files (for official submission):")
1749
- print(f" - Predictions: {predictions_file}")
1750
- print(f" - Trajectories: {trajs_dir}/ ({len(os.listdir(trajs_dir))} files)")
1751
- print(f" - Logs: {logs_dir}/ ({len(os.listdir(logs_dir))} dirs)")
1752
- print(f"{'='*70}")
1753
- print(f"\n Comparison:")
1754
- print(f" - Direct Claude: 99.67% patch gen")
1755
- print(f" - Loki Mode (multi-agent): {gen_rate:.1f}% patch gen")
1756
- print(f"{'='*70}")
1757
- print(f"\n Next Step: Run SWE-bench evaluator")
1758
- print(f" python -m swebench.harness.run_evaluation \\")
1759
- print(f" --predictions {predictions_file}")
1760
- print(f"{'='*70}\n")
1761
- SWEBENCH_LOKI
1762
-
1763
- log_success "Loki Mode SWE-bench patch generation complete"
1764
- log_info "Results: $RESULTS_DIR/swebench-loki-results.json"
1765
- log_info "Predictions: $RESULTS_DIR/swebench-loki-predictions.json"
1766
- }
1767
-
1768
- #===============================================================================
1769
- # Summary Report
1770
- #===============================================================================
1771
-
1772
- generate_summary() {
1773
- log_info "Generating benchmark summary..."
1774
-
1775
- local humaneval_results="$RESULTS_DIR/humaneval-results.json"
1776
- local swebench_results="$RESULTS_DIR/swebench-results.json"
1777
-
1778
- python3 << SUMMARY_GEN
1779
- import json
1780
- import os
1781
- from datetime import datetime
1782
-
1783
- RESULTS_DIR = os.environ.get('RESULTS_DIR', './results')
1784
-
1785
- summary = f"""# Loki Mode Benchmark Results
1786
-
1787
- **Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
1788
-
1789
- ## Overview
1790
-
1791
- This directory contains benchmark results for Loki Mode multi-agent system.
1792
-
1793
- """
1794
-
1795
- # HumanEval results
1796
- humaneval_file = f"{RESULTS_DIR}/humaneval-results.json"
1797
- if os.path.exists(humaneval_file):
1798
- with open(humaneval_file) as f:
1799
- he = json.load(f)
1800
-
1801
- if he.get("status") == "COMPLETED":
1802
- summary += f"""## HumanEval Results
1803
-
1804
- | Metric | Value |
1805
- |--------|-------|
1806
- | Problems | {he.get('total_problems', 'N/A')} |
1807
- | Passed | {he.get('passed', 'N/A')} |
1808
- | Failed | {he.get('failed', 'N/A')} |
1809
- | **Pass Rate** | **{he.get('pass_rate', 'N/A')}%** |
1810
- | Model | {he.get('model', 'N/A')} |
1811
- | Time | {he.get('elapsed_seconds', 'N/A')}s |
1812
-
1813
- ### Competitor Comparison
1814
-
1815
- | System | Pass@1 |
1816
- |--------|--------|
1817
- | MetaGPT | 85.9-87.7% |
1818
- | **Loki Mode** | **{he.get('pass_rate', 'N/A')}%** |
1819
-
1820
- """
1821
- else:
1822
- summary += f"""## HumanEval
1823
-
1824
- Status: {he.get('status', 'UNKNOWN')}
1825
-
1826
- To run: \`./benchmarks/run-benchmarks.sh humaneval --execute\`
1827
-
1828
- """
1829
-
1830
- # SWE-bench results
1831
- swebench_file = f"{RESULTS_DIR}/swebench-results.json"
1832
- if os.path.exists(swebench_file):
1833
- with open(swebench_file) as f:
1834
- sb = json.load(f)
1835
-
1836
- if sb.get("status") == "PATCHES_GENERATED":
1837
- summary += f"""## SWE-bench Lite Results
1838
-
1839
- | Metric | Value |
1840
- |--------|-------|
1841
- | Problems | {sb.get('total_problems', 'N/A')} |
1842
- | Patches Generated | {sb.get('generated', 'N/A')} |
1843
- | Errors | {sb.get('errors', 'N/A')} |
1844
- | Model | {sb.get('model', 'N/A')} |
1845
- | Time | {sb.get('elapsed_seconds', 'N/A')}s |
1846
-
1847
- **Next Step:** Run the SWE-bench evaluator to validate patches:
1848
-
1849
- \`\`\`bash
1850
- python -m swebench.harness.run_evaluation \\
1851
- --predictions {sb.get('predictions_file', 'swebench-predictions.json')} \\
1852
- --max_workers 4
1853
- \`\`\`
1854
-
1855
- """
1856
- else:
1857
- summary += f"""## SWE-bench Lite
1858
-
1859
- Status: {sb.get('status', 'UNKNOWN')}
1860
-
1861
- To run: \`./benchmarks/run-benchmarks.sh swebench --execute\`
1862
-
1863
- """
1864
-
1865
- summary += """## Methodology
1866
-
1867
- Loki Mode uses its multi-agent architecture to solve each problem:
1868
- 1. **Architect Agent** analyzes the problem
1869
- 2. **Engineer Agent** implements the solution
1870
- 3. **QA Agent** validates with test cases
1871
- 4. **Review Agent** checks code quality
1872
-
1873
- This mirrors real-world software development more accurately than single-agent approaches.
1874
-
1875
- ## Running Benchmarks
1876
-
1877
- \`\`\`bash
1878
- # Setup only (download datasets)
1879
- ./benchmarks/run-benchmarks.sh all
1880
-
1881
- # Execute with Claude
1882
- ./benchmarks/run-benchmarks.sh humaneval --execute
1883
- ./benchmarks/run-benchmarks.sh humaneval --execute --limit 10 # First 10 only
1884
- ./benchmarks/run-benchmarks.sh swebench --execute --limit 5 # First 5 only
1885
-
1886
- # Use different model
1887
- ./benchmarks/run-benchmarks.sh humaneval --execute --model opus
1888
- \`\`\`
1889
- """
1890
-
1891
- with open(f"{RESULTS_DIR}/SUMMARY.md", 'w') as f:
1892
- f.write(summary)
1893
-
1894
- print(f"Summary saved to {RESULTS_DIR}/SUMMARY.md")
1895
- SUMMARY_GEN
1896
-
1897
- log_success "Summary generated: $RESULTS_DIR/SUMMARY.md"
1898
- }
1899
-
1900
- #===============================================================================
1901
- # Main
1902
- #===============================================================================
1903
-
1904
- main() {
1905
- parse_args "$@"
1906
-
1907
- echo ""
1908
- echo "========================================"
1909
- echo " Loki Mode Benchmark Runner"
1910
- if [ "$EXECUTE_MODE" = true ]; then
1911
- echo " Mode: EXECUTE"
1912
- else
1913
- echo " Mode: SETUP"
1914
- fi
1915
- echo "========================================"
1916
- echo ""
1917
-
1918
- export SCRIPT_DIR RESULTS_DIR PROJECT_DIR
1919
-
1920
- setup_environment
1921
-
1922
- case "$BENCHMARK" in
1923
- humaneval)
1924
- run_humaneval
1925
- ;;
1926
- swebench)
1927
- run_swebench
1928
- ;;
1929
- all)
1930
- run_humaneval
1931
- run_swebench
1932
- ;;
1933
- *)
1934
- log_error "Unknown benchmark: $BENCHMARK"
1935
- echo "Usage: $0 [humaneval|swebench|all] [--execute] [--limit N]"
1936
- exit 1
1937
- ;;
1938
- esac
1939
-
1940
- generate_summary
1941
-
1942
- echo ""
1943
- log_success "Benchmarks complete!"
1944
- log_info "Results directory: $RESULTS_DIR"
1945
- echo ""
1946
- }
1947
-
1948
- main "$@"