@tyvm/knowhow 0.0.90 → 0.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/.depcheckrc +30 -0
  2. package/bin/knowhow.js +1 -1
  3. package/package.json +8 -34
  4. package/src/agents/configurable/ConfigAgent.ts +2 -2
  5. package/src/agents/tools/executeScript/index.ts +5 -0
  6. package/src/agents/tools/googleSearch.ts +2 -2
  7. package/src/agents/tools/index.ts +0 -3
  8. package/src/agents/tools/list.ts +0 -147
  9. package/src/agents/tools/loadWebpage.ts +3 -113
  10. package/src/auth/browserLogin.ts +10 -13
  11. package/src/chat/modules/AgentModule.ts +0 -1
  12. package/src/chat/types.ts +1 -1
  13. package/src/cli.ts +63 -3
  14. package/src/clients/gemini.ts +96 -25
  15. package/src/clients/http.ts +7 -11
  16. package/src/clients/pricing/google.ts +122 -26
  17. package/src/conversion.ts +24 -54
  18. package/src/index.ts +15 -20
  19. package/src/login.ts +5 -6
  20. package/src/plugins/language.ts +0 -4
  21. package/src/plugins/plugins.ts +0 -14
  22. package/src/plugins/url.ts +31 -12
  23. package/src/services/EmbeddingsService.ts +70 -0
  24. package/src/services/KnowhowClient.ts +34 -34
  25. package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
  26. package/src/services/S3.ts +19 -87
  27. package/src/services/index.ts +8 -8
  28. package/src/services/modules/index.ts +12 -3
  29. package/src/services/modules/types.ts +8 -2
  30. package/src/services/script-execution/ScriptExecutor.ts +29 -10
  31. package/src/services/script-execution/ScriptPolicy.ts +6 -2
  32. package/src/types.ts +1 -0
  33. package/src/utils/http.ts +127 -0
  34. package/src/workers/auth/PasskeySetup.ts +7 -11
  35. package/tests/clients/AIClient.test.ts +24 -21
  36. package/tests/manual/file-edits/figma.test.ts +3 -70
  37. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
  38. package/tests/plugins/language/languagePlugin.test.ts +2 -0
  39. package/tests/processors/ToolResponseCache.test.ts +2 -2
  40. package/tests/test.spec.ts +0 -14
  41. package/tests/unit/modules/moduleLoading.test.ts +12 -4
  42. package/tests/unit/plugins/pluginLoading.test.ts +6 -6
  43. package/ts_build/package.json +8 -34
  44. package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
  45. package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
  46. package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
  47. package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
  48. package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
  49. package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
  50. package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
  51. package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
  52. package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
  53. package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
  54. package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
  55. package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
  56. package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
  57. package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
  58. package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
  59. package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
  60. package/ts_build/src/agents/tools/executeScript/index.js +4 -1
  61. package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
  62. package/ts_build/src/agents/tools/googleSearch.js +2 -2
  63. package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
  64. package/ts_build/src/agents/tools/index.d.ts +0 -3
  65. package/ts_build/src/agents/tools/index.js +0 -3
  66. package/ts_build/src/agents/tools/index.js.map +1 -1
  67. package/ts_build/src/agents/tools/list.js +0 -138
  68. package/ts_build/src/agents/tools/list.js.map +1 -1
  69. package/ts_build/src/agents/tools/loadWebpage.js +1 -89
  70. package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
  71. package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
  72. package/ts_build/src/auth/browserLogin.js +7 -7
  73. package/ts_build/src/auth/browserLogin.js.map +1 -1
  74. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  75. package/ts_build/src/chat/types.d.ts +1 -1
  76. package/ts_build/src/cli.d.ts +1 -1
  77. package/ts_build/src/cli.js +47 -1
  78. package/ts_build/src/cli.js.map +1 -1
  79. package/ts_build/src/clients/gemini.d.ts +1 -73
  80. package/ts_build/src/clients/gemini.js +57 -19
  81. package/ts_build/src/clients/gemini.js.map +1 -1
  82. package/ts_build/src/clients/http.js +5 -9
  83. package/ts_build/src/clients/http.js.map +1 -1
  84. package/ts_build/src/clients/pricing/google.d.ts +17 -73
  85. package/ts_build/src/clients/pricing/google.js +47 -10
  86. package/ts_build/src/clients/pricing/google.js.map +1 -1
  87. package/ts_build/src/conversion.d.ts +1 -4
  88. package/ts_build/src/conversion.js +12 -27
  89. package/ts_build/src/conversion.js.map +1 -1
  90. package/ts_build/src/index.d.ts +4 -0
  91. package/ts_build/src/index.js +15 -14
  92. package/ts_build/src/index.js.map +1 -1
  93. package/ts_build/src/login.js +5 -4
  94. package/ts_build/src/login.js.map +1 -1
  95. package/ts_build/src/plugins/downloader/downloader.js +3 -3
  96. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  97. package/ts_build/src/plugins/language.js.map +1 -1
  98. package/ts_build/src/plugins/plugins.js +0 -14
  99. package/ts_build/src/plugins/plugins.js.map +1 -1
  100. package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
  101. package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
  102. package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
  103. package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
  104. package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
  105. package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
  106. package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
  107. package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
  108. package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
  109. package/ts_build/src/plugins/url.js +27 -8
  110. package/ts_build/src/plugins/url.js.map +1 -1
  111. package/ts_build/src/services/EmbeddingsService.d.ts +14 -0
  112. package/ts_build/src/services/EmbeddingsService.js +33 -0
  113. package/ts_build/src/services/EmbeddingsService.js.map +1 -0
  114. package/ts_build/src/services/GitHub.js +2 -2
  115. package/ts_build/src/services/GitHub.js.map +1 -1
  116. package/ts_build/src/services/KnowhowClient.d.ts +29 -29
  117. package/ts_build/src/services/KnowhowClient.js +33 -33
  118. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  119. package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
  120. package/ts_build/src/services/MediaProcessorService.js +215 -0
  121. package/ts_build/src/services/MediaProcessorService.js.map +1 -0
  122. package/ts_build/src/services/S3.d.ts +0 -4
  123. package/ts_build/src/services/S3.js +14 -60
  124. package/ts_build/src/services/S3.js.map +1 -1
  125. package/ts_build/src/services/index.d.ts +6 -5
  126. package/ts_build/src/services/index.js +6 -6
  127. package/ts_build/src/services/index.js.map +1 -1
  128. package/ts_build/src/services/modules/index.js +12 -3
  129. package/ts_build/src/services/modules/index.js.map +1 -1
  130. package/ts_build/src/services/modules/types.d.ts +8 -2
  131. package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
  132. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  133. package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
  134. package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
  135. package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
  136. package/ts_build/src/types.d.ts +1 -0
  137. package/ts_build/src/types.js +1 -0
  138. package/ts_build/src/types.js.map +1 -1
  139. package/ts_build/src/utils/http.d.ts +27 -0
  140. package/ts_build/src/utils/http.js +98 -0
  141. package/ts_build/src/utils/http.js.map +1 -0
  142. package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
  143. package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
  144. package/ts_build/tests/clients/AIClient.test.js +11 -14
  145. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  146. package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
  147. package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
  148. package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
  149. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
  150. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  151. package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
  152. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
  153. package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
  154. package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
  155. package/ts_build/tests/test.spec.js +0 -14
  156. package/ts_build/tests/test.spec.js.map +1 -1
  157. package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
  158. package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
  159. package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
  160. package/ts_build/tests/unit/modules/moduleLoading.test.js +11 -4
  161. package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
  162. package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
  163. package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
  164. package/benchmarks/.dockerignore +0 -7
  165. package/benchmarks/README.md +0 -166
  166. package/benchmarks/docker/Dockerfile +0 -68
  167. package/benchmarks/example-config.yml +0 -27
  168. package/benchmarks/jest.config.js +0 -13
  169. package/benchmarks/package-lock.json +0 -4297
  170. package/benchmarks/package.json +0 -39
  171. package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
  172. package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
  173. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
  174. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
  175. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
  176. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
  177. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
  178. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
  179. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
  180. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
  181. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
  182. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
  183. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
  184. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
  185. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
  186. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
  187. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
  188. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
  189. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
  190. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
  191. package/benchmarks/scripts/build-and-run.sh +0 -47
  192. package/benchmarks/scripts/clone-exercism.sh +0 -92
  193. package/benchmarks/scripts/validate.sh +0 -48
  194. package/benchmarks/src/__tests__/runner.test.ts +0 -27
  195. package/benchmarks/src/cli.ts +0 -90
  196. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
  197. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
  198. package/benchmarks/src/evaluators/index.ts +0 -3
  199. package/benchmarks/src/evaluators/types.ts +0 -22
  200. package/benchmarks/src/index.ts +0 -3
  201. package/benchmarks/src/providers.ts +0 -13
  202. package/benchmarks/src/runner.ts +0 -824
  203. package/benchmarks/src/types.ts +0 -63
  204. package/benchmarks/tsconfig.json +0 -19
  205. package/leaderboard/README.md +0 -148
  206. package/leaderboard/app/api/benchmark-data/route.ts +0 -131
  207. package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
  208. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
  209. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
  210. package/leaderboard/app/globals.css +0 -27
  211. package/leaderboard/app/layout.tsx +0 -21
  212. package/leaderboard/app/page.tsx +0 -170
  213. package/leaderboard/components/LeaderboardTable.tsx +0 -168
  214. package/leaderboard/components/PerformanceChart.tsx +0 -109
  215. package/leaderboard/next-env.d.ts +0 -5
  216. package/leaderboard/next.config.js +0 -4
  217. package/leaderboard/package-lock.json +0 -6363
  218. package/leaderboard/package.json +0 -28
  219. package/leaderboard/postcss.config.js +0 -6
  220. package/leaderboard/tailwind.config.js +0 -17
  221. package/leaderboard/tsconfig.json +0 -28
  222. package/leaderboard/types/benchmark.ts +0 -67
  223. package/leaderboard/utils/dataProcessor.ts +0 -33
  224. package/src/agents/tools/asana/definitions.ts +0 -199
  225. package/src/agents/tools/asana/index.ts +0 -108
  226. package/src/agents/tools/ast/astAppendNode.ts +0 -90
  227. package/src/agents/tools/ast/astDeleteNode.ts +0 -88
  228. package/src/agents/tools/ast/astEditNode.ts +0 -95
  229. package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
  230. package/src/agents/tools/ast/astListPaths.ts +0 -66
  231. package/src/agents/tools/ast/index.ts +0 -7
  232. package/src/agents/tools/github/definitions.ts +0 -89
  233. package/src/agents/tools/github/index.ts +0 -67
  234. package/src/chat-old.ts +0 -446
  235. package/src/plugins/asana.ts +0 -146
  236. package/src/plugins/downloader/plugin.ts +0 -103
  237. package/src/plugins/downloader/types.ts +0 -92
  238. package/src/plugins/figma.ts +0 -158
  239. package/src/plugins/github.ts +0 -219
  240. package/src/plugins/jira.ts +0 -115
  241. package/src/plugins/linear.ts +0 -230
  242. package/src/plugins/notion.ts +0 -179
  243. package/src/plugins/tree-sitter/editor.ts +0 -369
  244. package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
  245. package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
  246. package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
  247. package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
  248. package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
  249. package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
  250. package/src/plugins/tree-sitter/parser.ts +0 -470
  251. package/src/plugins/tree-sitter/simple-paths.ts +0 -467
  252. package/src/services/GitHub.ts +0 -59
  253. package/tests/tree-sitter/editor.test.ts +0 -113
  254. package/tests/tree-sitter/invalid.test.ts +0 -299
  255. package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
  256. package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
  257. package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
  258. package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
  259. package/tests/tree-sitter/paths/paths.test.ts +0 -170
  260. package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
  261. package/tests/tree-sitter/sample-after.ts +0 -48
  262. package/tests/tree-sitter/sample-before.ts +0 -25
  263. package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
  264. package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
  265. package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
  266. package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
  267. package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
  268. package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
  269. package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
  270. package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
  271. package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
  272. package/tests/tree-sitter/tree-sitter.test.ts +0 -251
@@ -499,29 +499,81 @@ export class GenericGeminiClient implements GenericClient {
499
499
 
500
500
  let cost = 0;
501
501
 
502
- if ("promptTokenCount" in usage && usage.promptTokenCount) {
503
- if (usage.promptTokenCount > 200000 && pricing.input_gt_200k) {
504
- cost += (usage.promptTokenCount * pricing.input_gt_200k) / 1e6;
505
- } else {
506
- cost += (usage.promptTokenCount * pricing.input) / 1e6;
502
+ // ── Input tokens ──────────────────────────────────────────────────────────
503
+ // Use per-modality breakdowns when available (inputTokensDetails) so that
504
+ // audio tokens (which can cost 3–10× more than text) are billed correctly.
505
+ const inputDetails = (usage as any).inputTokensDetails as
506
+ | { modality?: string; tokenCount?: number }[]
507
+ | undefined;
508
+
509
+ if (
510
+ inputDetails &&
511
+ inputDetails.length > 0 &&
512
+ pricing.input_audio !== undefined
513
+ ) {
514
+ // Modality-aware billing
515
+ for (const detail of inputDetails) {
516
+ const tokens = detail.tokenCount || 0;
517
+ if (!tokens) continue;
518
+ const modality = (detail.modality || "").toUpperCase();
519
+ if (modality === "AUDIO") {
520
+ const rate = pricing.input_audio ?? pricing.input ?? 0;
521
+ cost += (tokens * rate) / 1e6;
522
+ } else {
523
+ // TEXT / IMAGE / VIDEO all use the base `input` rate
524
+ const isGt200k = tokens > 200_000 && pricing.input_gt_200k;
525
+ const rate = isGt200k ? pricing.input_gt_200k! : pricing.input ?? 0;
526
+ cost += (tokens * rate) / 1e6;
527
+ }
507
528
  }
529
+ } else if ("promptTokenCount" in usage && usage.promptTokenCount) {
530
+ // Fallback: no modality breakdown available — use total token count
531
+ const isGt200k =
532
+ usage.promptTokenCount > 200_000 && pricing.input_gt_200k;
533
+ const rate = isGt200k ? pricing.input_gt_200k! : pricing.input ?? 0;
534
+ cost += (usage.promptTokenCount * rate) / 1e6;
508
535
  }
509
536
 
510
- if ("responseTokenCount" in usage && usage.responseTokenCount) {
511
- if (usage.responseTokenCount > 200000 && pricing.output_gt_200k) {
512
- cost += (usage.responseTokenCount * pricing.output_gt_200k) / 1e6;
513
- } else {
514
- cost += (usage.responseTokenCount * pricing.output) / 1e6;
537
+ // ── Output tokens ─────────────────────────────────────────────────────────
538
+ const outputDetails = (usage as any).outputTokensDetails as
539
+ | { modality?: string; tokenCount?: number }[]
540
+ | undefined;
541
+
542
+ if (
543
+ outputDetails &&
544
+ outputDetails.length > 0 &&
545
+ pricing.output_audio !== undefined
546
+ ) {
547
+ // Modality-aware billing
548
+ for (const detail of outputDetails) {
549
+ const tokens = detail.tokenCount || 0;
550
+ if (!tokens) continue;
551
+ const modality = (detail.modality || "").toUpperCase();
552
+ if (modality === "AUDIO") {
553
+ const rate = pricing.output_audio ?? pricing.output ?? 0;
554
+ cost += (tokens * rate) / 1e6;
555
+ } else {
556
+ const isGt200k = tokens > 200_000 && pricing.output_gt_200k;
557
+ const rate = isGt200k ? pricing.output_gt_200k! : pricing.output ?? 0;
558
+ cost += (tokens * rate) / 1e6;
559
+ }
515
560
  }
561
+ } else if ("responseTokenCount" in usage && usage.responseTokenCount) {
562
+ // Fallback: no modality breakdown — use total token count
563
+ const isGt200k =
564
+ usage.responseTokenCount > 200_000 && pricing.output_gt_200k;
565
+ const rate = isGt200k ? pricing.output_gt_200k! : pricing.output ?? 0;
566
+ cost += (usage.responseTokenCount * rate) / 1e6;
516
567
  }
517
568
 
569
+ // ── Context caching ───────────────────────────────────────────────────────
518
570
  if (
519
571
  "cachedContentTokenCount" in usage &&
520
572
  usage.cachedContentTokenCount &&
521
573
  pricing.context_caching
522
574
  ) {
523
575
  if (
524
- usage.cachedContentTokenCount > 200000 &&
576
+ usage.cachedContentTokenCount > 200_000 &&
525
577
  pricing.context_caching_gt_200k
526
578
  ) {
527
579
  cost +=
@@ -531,6 +583,7 @@ export class GenericGeminiClient implements GenericClient {
531
583
  cost += (usage.cachedContentTokenCount * pricing.context_caching) / 1e6;
532
584
  }
533
585
  }
586
+
534
587
  return cost;
535
588
  }
536
589
 
@@ -675,9 +728,7 @@ export class GenericGeminiClient implements GenericClient {
675
728
  const images = generatedImages.map((img) => ({
676
729
  // imageBytes is already a base64-encoded string from the API
677
730
  // Don't re-encode it, just use it directly
678
- b64_json: img.image?.imageBytes
679
- ? img.image.imageBytes
680
- : "",
731
+ b64_json: img.image?.imageBytes ? img.image.imageBytes : "",
681
732
  revised_prompt: options.prompt,
682
733
  }));
683
734
 
@@ -752,9 +803,11 @@ export class GenericGeminiClient implements GenericClient {
752
803
  },
753
804
  });
754
805
 
755
- // Calculate estimated cost: $0.35 per second of video
806
+ // Calculate estimated cost using model-specific per-second rate
756
807
  const duration = options.duration || 5; // Default 5 seconds
757
- const usdCost = (options.n || 1) * duration * 0.35;
808
+ const pricingEntry = GeminiTextPricing[options.model];
809
+ const ratePerSec = pricingEntry?.video_generation ?? 0.4; // default to $0.40/sec (Veo 3 rate)
810
+ const usdCost = (options.n || 1) * duration * ratePerSec;
758
811
 
759
812
  // Return the operation name as jobId so callers can use getVideoStatus / downloadVideo
760
813
  return {
@@ -769,7 +822,9 @@ export class GenericGeminiClient implements GenericClient {
769
822
  }
770
823
  }
771
824
 
772
- async getVideoStatus(options: VideoStatusOptions): Promise<VideoStatusResponse> {
825
+ async getVideoStatus(
826
+ options: VideoStatusOptions
827
+ ): Promise<VideoStatusResponse> {
773
828
  try {
774
829
  const operation = await this.client.operations.getVideosOperation({
775
830
  operation: { name: options.jobId },
@@ -817,7 +872,9 @@ export class GenericGeminiClient implements GenericClient {
817
872
  * Download a video (or any file) via the Google GenAI Files API.
818
873
  * Pass either `fileId` (the files/* name) or `uri` (the full URI).
819
874
  */
820
- async downloadVideo(options: FileDownloadOptions): Promise<FileDownloadResponse> {
875
+ async downloadVideo(
876
+ options: FileDownloadOptions
877
+ ): Promise<FileDownloadResponse> {
821
878
  return this.downloadFile(options);
822
879
  }
823
880
 
@@ -841,7 +898,9 @@ export class GenericGeminiClient implements GenericClient {
841
898
  uri: uploadedFile.uri,
842
899
  url: uploadedFile.downloadUri || uploadedFile.uri,
843
900
  mimeType: uploadedFile.mimeType,
844
- sizeBytes: uploadedFile.sizeBytes ? Number(uploadedFile.sizeBytes) : undefined,
901
+ sizeBytes: uploadedFile.sizeBytes
902
+ ? Number(uploadedFile.sizeBytes)
903
+ : undefined,
845
904
  };
846
905
  } catch (error) {
847
906
  console.error("Error uploading file to Google GenAI Files API:", error);
@@ -860,7 +919,9 @@ export class GenericGeminiClient implements GenericClient {
860
919
  * For generated videos the `file` param accepts the Video object directly
861
920
  * (uri + optional mimeType), which the SDK resolves to a download URL.
862
921
  */
863
- async downloadFile(options: FileDownloadOptions): Promise<FileDownloadResponse> {
922
+ async downloadFile(
923
+ options: FileDownloadOptions
924
+ ): Promise<FileDownloadResponse> {
864
925
  const mimeMap: Record<string, string> = {
865
926
  ".mp4": "video/mp4",
866
927
  ".webm": "video/webm",
@@ -894,7 +955,9 @@ export class GenericGeminiClient implements GenericClient {
894
955
 
895
956
  const response = await fetch(downloadUrl);
896
957
  if (!response.ok) {
897
- throw new Error(`HTTP ${response.status} ${response.statusText} downloading ${downloadUrl}`);
958
+ throw new Error(
959
+ `HTTP ${response.status} ${response.statusText} downloading ${downloadUrl}`
960
+ );
898
961
  }
899
962
 
900
963
  const arrayBuffer = await response.arrayBuffer();
@@ -902,23 +965,31 @@ export class GenericGeminiClient implements GenericClient {
902
965
 
903
966
  // If caller supplied a filePath, write to it (creating dirs as needed)
904
967
  if (options.filePath) {
905
- fsSync.mkdirSync(pathSync.dirname(options.filePath), { recursive: true });
968
+ fsSync.mkdirSync(pathSync.dirname(options.filePath), {
969
+ recursive: true,
970
+ });
906
971
  fsSync.writeFileSync(options.filePath, data);
907
972
  }
908
973
 
909
974
  // Infer mime type from the URI/fileId first (more reliable), then from the path
910
- const sourceForExt = options.uri || options.fileId || options.filePath || "";
975
+ const sourceForExt =
976
+ options.uri || options.fileId || options.filePath || "";
911
977
  const ext = pathSync.extname(sourceForExt.split("?")[0]).toLowerCase();
912
978
  const mimeType = mimeMap[ext] || "video/mp4";
913
979
 
914
980
  return { data, mimeType };
915
981
  } catch (error) {
916
- console.error("Error downloading file from Google GenAI Files API:", error);
982
+ console.error(
983
+ "Error downloading file from Google GenAI Files API:",
984
+ error
985
+ );
917
986
  throw error;
918
987
  }
919
988
  }
920
989
 
921
- getContextLimit(model: string): { contextLimit: number; threshold: number } | undefined {
990
+ getContextLimit(
991
+ model: string
992
+ ): { contextLimit: number; threshold: number } | undefined {
922
993
  const contextLimit = ContextLimits[model];
923
994
  if (contextLimit === undefined) return undefined;
924
995
  const pricing = GeminiTextPricing[model];
@@ -1,4 +1,4 @@
1
- import axios from "axios";
1
+ import http from "../utils/http";
2
2
  import {
3
3
  GenericClient,
4
4
  CompletionOptions,
@@ -91,12 +91,10 @@ export class HttpClient implements GenericClient {
91
91
  }),
92
92
  };
93
93
 
94
- const response = await axios.post(
94
+ const response = await http.post(
95
95
  `${this.baseUrl}/v1/chat/completions`,
96
96
  body,
97
- {
98
- headers: this.headers,
99
- }
97
+ { headers: this.headers as Record<string, string> }
100
98
  );
101
99
 
102
100
  const data = response.data;
@@ -123,15 +121,13 @@ export class HttpClient implements GenericClient {
123
121
 
124
122
  async createEmbedding(options: EmbeddingOptions): Promise<EmbeddingResponse> {
125
123
  return this.withRetry(async () => {
126
- const response = await axios.post(
124
+ const response = await http.post(
127
125
  `${this.baseUrl}/v1/embeddings`,
128
126
  {
129
127
  model: options.model,
130
128
  input: options.input,
131
129
  },
132
- {
133
- headers: this.headers,
134
- }
130
+ { headers: this.headers as Record<string, string> }
135
131
  );
136
132
 
137
133
  const data = response.data;
@@ -152,8 +148,8 @@ export class HttpClient implements GenericClient {
152
148
 
153
149
  async getModels() {
154
150
  return this.withRetry(async () => {
155
- const response = await axios.get(`${this.baseUrl}/v1/models`, {
156
- headers: this.headers,
151
+ const response = await http.get(`${this.baseUrl}/v1/models`, {
152
+ headers: this.headers as Record<string, string>,
157
153
  });
158
154
 
159
155
  const data = response.data?.data;
@@ -1,7 +1,38 @@
1
1
  import { Models, EmbeddingModels } from "../../types";
2
2
 
3
- export const GeminiTextPricing = {
4
- // Gemini 3.x
3
+ /**
4
+ * Gemini model pricing per 1M tokens (USD).
5
+ *
6
+ * Many models have modality-dependent pricing:
7
+ * - `input`: text / image / video input rate
8
+ * - `input_audio`: audio input rate (if different from text)
9
+ * - `output`: text output rate
10
+ * - `output_audio`: audio output rate (if different from text)
11
+ * - `context_caching`: context cache read rate (text/image/video)
12
+ * - `context_caching_audio`: context cache read rate for audio tokens
13
+ * - `context_caching_gt_200k`: rate for prompts > 200k tokens
14
+ * - `context_caching_storage`: storage cost per 1M tokens per hour
15
+ * - `image_generation`: per-image cost for image output models
16
+ * - `video_generation`: per-second cost for video generation models
17
+ */
18
+ export interface GeminiModelPricing {
19
+ input?: number;
20
+ input_audio?: number;
21
+ input_gt_200k?: number;
22
+ output?: number;
23
+ output_audio?: number;
24
+ output_gt_200k?: number;
25
+ context_caching?: number;
26
+ context_caching_audio?: number;
27
+ context_caching_gt_200k?: number;
28
+ context_caching_storage?: number; // per 1M tokens per hour
29
+ image_generation?: number;
30
+ video_generation?: number;
31
+ thinking_output?: number;
32
+ }
33
+
34
+ export const GeminiPricing: Record<string, GeminiModelPricing> = {
35
+ // ── Gemini 3.x ────────────────────────────────────────────────────────────
5
36
  [Models.google.Gemini_31_Pro_Preview]: {
6
37
  input: 2,
7
38
  input_gt_200k: 4,
@@ -9,28 +40,49 @@ export const GeminiTextPricing = {
9
40
  output_gt_200k: 18,
10
41
  context_caching: 0.2,
11
42
  context_caching_gt_200k: 0.4,
43
+ context_caching_storage: 4.5,
12
44
  },
45
+ // gemini-3.1-flash-image-preview: text/image input, text+image output
13
46
  [Models.google.Gemini_31_Flash_Image_Preview]: {
14
47
  input: 0.5,
15
48
  output: 3,
16
- image_generation: 0.045, // per 0.5K image
49
+ image_generation: 0.045, // per 0.5K image ($60/1M tokens × 747 tokens)
17
50
  },
51
+ // gemini-3.1-flash-lite-preview: audio input costs 2× text
18
52
  [Models.google.Gemini_31_Flash_Lite_Preview]: {
19
53
  input: 0.25,
54
+ input_audio: 0.5,
20
55
  output: 1.5,
21
56
  context_caching: 0.025,
22
- },
57
+ context_caching_audio: 0.05,
58
+ context_caching_storage: 1.0,
59
+ },
60
+ // gemini-3.1-flash-live-preview: Live API, per-modality rates
61
+ // Input: $0.75 text, $3.00 audio/video, $1.00 image
62
+ // Output: $4.50 text, $12.00 audio
63
+ [Models.google.Gemini_31_Flash_Live_Preview]: {
64
+ input: 0.75, // text
65
+ input_audio: 3.0, // audio / video
66
+ output: 4.5, // text
67
+ output_audio: 12.0, // audio
68
+ },
69
+ // gemini-3-flash-preview: audio input costs 2× text
23
70
  [Models.google.Gemini_3_Flash_Preview]: {
24
71
  input: 0.5,
72
+ input_audio: 1.0,
25
73
  output: 3.0,
26
74
  context_caching: 0.05,
75
+ context_caching_audio: 0.10,
76
+ context_caching_storage: 1.0,
27
77
  },
78
+ // gemini-3-pro-image-preview: text+image input, text+image output
28
79
  [Models.google.Gemini_3_Pro_Image_Preview]: {
29
80
  input: 2,
30
81
  output: 12,
31
- image_generation: 0.134, // per 1K/2K image
82
+ image_generation: 0.134, // per 1K/2K image ($120/1M tokens × 1120 tokens)
32
83
  },
33
- // Gemini 2.5
84
+
85
+ // ── Gemini 2.5 ────────────────────────────────────────────────────────────
34
86
  [Models.google.Gemini_25_Pro]: {
35
87
  input: 1.25,
36
88
  input_gt_200k: 2.5,
@@ -38,22 +90,34 @@ export const GeminiTextPricing = {
38
90
  output_gt_200k: 15.0,
39
91
  context_caching: 0.125,
40
92
  context_caching_gt_200k: 0.25,
93
+ context_caching_storage: 4.5,
41
94
  },
95
+ // gemini-2.5-flash: audio input costs 3.3× text; audio caching 3.3× text
42
96
  [Models.google.Gemini_25_Flash]: {
43
97
  input: 0.3,
98
+ input_audio: 1.0,
44
99
  output: 2.5,
45
100
  context_caching: 0.03,
101
+ context_caching_audio: 0.1,
102
+ context_caching_storage: 1.0,
46
103
  },
104
+ // gemini-2.5-flash-lite: audio input costs 3× text; audio caching 3× text
47
105
  [Models.google.Gemini_25_Flash_Lite]: {
48
106
  input: 0.1,
107
+ input_audio: 0.3,
49
108
  output: 0.4,
50
109
  context_caching: 0.01,
110
+ context_caching_audio: 0.03,
111
+ context_caching_storage: 1.0,
51
112
  },
113
+ // gemini-2.5-flash-preview-05-20: same modality splits as 2.5 Flash
52
114
  [Models.google.Gemini_25_Flash_Preview]: {
53
115
  input: 0.3,
116
+ input_audio: 1.0,
54
117
  output: 2.5,
55
- thinking_output: 3.5,
56
- context_caching: 0.0375,
118
+ context_caching: 0.03,
119
+ context_caching_audio: 0.1,
120
+ context_caching_storage: 1.0,
57
121
  },
58
122
  [Models.google.Gemini_25_Pro_Preview]: {
59
123
  input: 1.25,
@@ -62,24 +126,48 @@ export const GeminiTextPricing = {
62
126
  output_gt_200k: 15.0,
63
127
  context_caching: 0.125,
64
128
  context_caching_gt_200k: 0.25,
129
+ context_caching_storage: 4.5,
65
130
  },
131
+ // gemini-2.5-flash-image: image output model, per-image pricing
66
132
  [Models.google.Gemini_25_Flash_Image]: {
67
133
  input: 0.3,
68
- output: 0.039, // per image ($30/1M tokens, 1290 tokens per image)
134
+ output: 0.039, // per image ($30/1M tokens × 1290 tokens)
135
+ },
136
+ // gemini-2.5-flash-live / gemini-2.5-flash-native-audio-preview-12-2025:
137
+ // Live API models — audio I/O costs dramatically more than text
138
+ // Input: $0.50 text, $3.00 audio/video
139
+ // Output: $2.00 text, $12.00 audio
140
+ [Models.google.Gemini_25_Flash_Live]: {
141
+ input: 0.5,
142
+ input_audio: 3.0,
143
+ output: 2.0,
144
+ output_audio: 12.0,
145
+ },
146
+ [Models.google.Gemini_25_Flash_Native_Audio]: {
147
+ input: 0.5,
148
+ input_audio: 3.0,
149
+ output: 2.0,
150
+ output_audio: 12.0,
69
151
  },
152
+ // TTS models: text-only input, audio-only output
70
153
  [Models.google.Gemini_25_Flash_TTS]: {
71
154
  input: 0.5,
72
- output: 10.0,
155
+ output_audio: 10.0,
73
156
  },
74
157
  [Models.google.Gemini_25_Pro_TTS]: {
75
158
  input: 1.0,
76
- output: 20.0,
159
+ output_audio: 20.0,
77
160
  },
78
- // Gemini 2.0 (deprecated)
161
+
162
+ // ── Gemini 2.0 (deprecated) ───────────────────────────────────────────────
163
+ // gemini-2.0-flash: audio input costs 7× text; audio caching costs 7× text
79
164
  [Models.google.Gemini_20_Flash]: {
80
165
  input: 0.1,
166
+ input_audio: 0.7,
81
167
  output: 0.4,
82
168
  context_caching: 0.025,
169
+ context_caching_audio: 0.175,
170
+ context_caching_storage: 1.0,
83
171
  },
84
172
  [Models.google.Gemini_20_Flash_Preview_Image_Generation]: {
85
173
  input: 0.1,
@@ -90,7 +178,8 @@ export const GeminiTextPricing = {
90
178
  input: 0.075,
91
179
  output: 0.3,
92
180
  },
93
- // Gemini 1.5 (legacy)
181
+
182
+ // ── Gemini 1.5 (legacy) ───────────────────────────────────────────────────
94
183
  [Models.google.Gemini_15_Flash]: {
95
184
  input: 0.075,
96
185
  output: 0.3,
@@ -106,39 +195,46 @@ export const GeminiTextPricing = {
106
195
  output: 5.0,
107
196
  context_caching: 0.3125,
108
197
  },
109
- // Image generation
198
+
199
+ // ── Image generation ──────────────────────────────────────────────────────
110
200
  [Models.google.Imagen_3]: {
111
- image_generation: 0.04, // Imagen 4 Standard: $0.04/image
201
+ image_generation: 0.04, // $0.04/image (Imagen 4 Standard)
112
202
  },
113
203
  [Models.google.Imagen_4_Fast]: {
114
- image_generation: 0.02, // $0.02/image
204
+ image_generation: 0.02,
115
205
  },
116
206
  [Models.google.Imagen_4_Ultra]: {
117
- image_generation: 0.06, // $0.06/image
207
+ image_generation: 0.06,
118
208
  },
119
- // Video generation
209
+
210
+ // ── Video generation ──────────────────────────────────────────────────────
120
211
  [Models.google.Veo_2]: {
121
- video_generation: 0.35,
212
+ video_generation: 0.35, // per second
122
213
  },
123
214
  [Models.google.Veo_3]: {
124
- video_generation: 0.4, // $0.40/second
215
+ video_generation: 0.4,
125
216
  },
126
217
  [Models.google.Veo_3_Fast]: {
127
- video_generation: 0.15, // $0.15/second
218
+ video_generation: 0.1, // 720p base rate; 1080p=$0.12, 4k=$0.30 not expressible here
128
219
  },
129
220
  [Models.google.Veo_3_1]: {
130
- video_generation: 0.4, // $0.40/second (720p/1080p)
221
+ video_generation: 0.4, // 720p/1080p; 4k=$0.60 not expressible as single scalar
131
222
  },
132
223
  [Models.google.Veo_3_1_Fast]: {
133
- video_generation: 0.15, // $0.15/second
224
+ video_generation: 0.1, // 720p base; 1080p=$0.12, 4k=$0.30
134
225
  },
135
- // Embeddings
226
+
227
+ // ── Embeddings ────────────────────────────────────────────────────────────
136
228
  [EmbeddingModels.google.Gemini_Embedding]: {
137
229
  input: 0, // Free of charge
138
- output: 0, // Free of charge
139
230
  },
140
231
  [EmbeddingModels.google.Gemini_Embedding_001]: {
141
232
  input: 0.15,
142
- output: 0,
143
233
  },
144
234
  };
235
+
236
+ /**
237
+ * @deprecated Use GeminiPricing instead.
238
+ * Kept as alias for backwards compatibility.
239
+ */
240
+ export const GeminiTextPricing = GeminiPricing;
package/src/conversion.ts CHANGED
@@ -2,7 +2,16 @@ import pdf from "pdf-parse";
2
2
  import * as fs from "fs";
3
3
  import * as path from "path";
4
4
  import { readFile, fileExists } from "./utils";
5
- import { services } from "./services";
5
+
6
+ /**
7
+ * Get the MediaProcessorService from services() lazily.
8
+ * We import lazily to avoid circular dependency issues.
9
+ */
10
+ function getMediaProcessor() {
11
+ // eslint-disable-next-line @typescript-eslint/no-var-requires
12
+ const { services } = require("./services") as typeof import("./services");
13
+ return services().MediaProcessor;
14
+ }
6
15
 
7
16
  export async function processAudio(
8
17
  filePath: string,
@@ -17,24 +26,12 @@ export async function processAudio(
17
26
  console.log(`Transcription ${outputPath} already exists, skipping`);
18
27
  const fileContent = await readFile(outputPath, "utf8");
19
28
  return outputPath.endsWith("txt")
20
- ? fileContent.split("\n")
21
- : JSON.parse(fileContent);
29
+ ? (fileContent as string).split("\n")
30
+ : JSON.parse(fileContent as string);
22
31
  }
23
32
 
24
- const { Downloader } = services();
25
- const chunks = await Downloader.chunk(
26
- filePath,
27
- parsed.dir,
28
- chunkTime,
29
- reusePreviousTranscript
30
- );
31
- const transcription = await Downloader.transcribeChunks(
32
- chunks,
33
- outputPath,
34
- reusePreviousTranscript
35
- );
36
-
37
- return transcription;
33
+ const mediaProcessor = getMediaProcessor();
34
+ return mediaProcessor.processAudio(filePath, reusePreviousTranscript, chunkTime);
38
35
  }
39
36
 
40
37
  export async function convertAudioToText(
@@ -73,48 +70,21 @@ export async function processVideo(
73
70
  chunkTime
74
71
  );
75
72
 
76
- console.log("Extracting keyframes...");
77
- const { Downloader } = services();
78
- const videoAnalysis = await Downloader.extractKeyframes(
79
- filePath,
80
- outputPath,
81
- reusePreviousTranscript,
82
- chunkTime
83
- );
84
-
85
- return videoAnalysis.map((frame, index) => {
86
- return {
87
- frame,
88
- transcription: transcriptions[index],
89
- };
90
- });
73
+ // Return the transcriptions as text — keyframe extraction requires the
74
+ // @tyvm/knowhow-module-video-downloader module
75
+ return transcriptions;
91
76
  }
92
77
 
93
78
  async function convertVideoToText(
94
79
  filePath: string,
95
80
  reusePreviousTranscript = true,
96
81
  chunkTime = 30
97
- ) {
98
- const processed = await processVideo(
99
- filePath,
100
- reusePreviousTranscript,
101
- chunkTime
102
- );
103
-
104
- let fullString = "";
105
-
106
- for (let i = 0; i < processed.length; i++) {
107
- const chunk = processed[i];
108
- fullString += `
109
- Chunk: (${i + 1}/ ${processed.length}):
110
- Start Timestamp: [${i * chunkTime}s]
111
- Visual: ${chunk.frame.description}
112
- Audio: ${chunk.transcription}
113
- End Timestamp: [${i * chunkTime}s]
114
- `;
82
+ ): Promise<string> {
83
+ const transcriptions = await processVideo(filePath, reusePreviousTranscript, chunkTime);
84
+ if (Array.isArray(transcriptions)) {
85
+ return transcriptions.join("\n");
115
86
  }
116
-
117
- return fullString;
87
+ return String(transcriptions);
118
88
  }
119
89
 
120
90
  async function convertPdfToText(filePath: string) {
@@ -123,7 +93,7 @@ async function convertPdfToText(filePath: string) {
123
93
  return data.text;
124
94
  }
125
95
 
126
- export async function convertToText(filePath: string) {
96
+ export async function convertToText(filePath: string): Promise<string> {
127
97
  const extension = filePath.split(".").pop();
128
98
 
129
99
  switch (extension) {
@@ -140,6 +110,6 @@ export async function convertToText(filePath: string) {
140
110
  case "pdf":
141
111
  return convertPdfToText(filePath);
142
112
  default:
143
- return readFile(filePath, "utf8");
113
+ return ((await readFile(filePath, "utf8")) as string) || "";
144
114
  }
145
115
  }