hazo_llm_api 1.2.13 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/README.md +321 -8
  2. package/config/hazo_llm_api_config.ini +33 -0
  3. package/dist/components/index.d.ts +2 -0
  4. package/dist/components/index.d.ts.map +1 -1
  5. package/dist/components/index.js +2 -0
  6. package/dist/components/index.js.map +1 -1
  7. package/dist/components/llm_call_inspector/index.d.ts +6 -0
  8. package/dist/components/llm_call_inspector/index.d.ts.map +1 -0
  9. package/dist/components/llm_call_inspector/index.js +5 -0
  10. package/dist/components/llm_call_inspector/index.js.map +1 -0
  11. package/dist/components/llm_call_inspector/llm_call_inspector.d.ts +18 -0
  12. package/dist/components/llm_call_inspector/llm_call_inspector.d.ts.map +1 -0
  13. package/dist/components/llm_call_inspector/llm_call_inspector.js +103 -0
  14. package/dist/components/llm_call_inspector/llm_call_inspector.js.map +1 -0
  15. package/dist/components/llm_cost_dashboard/index.d.ts +6 -0
  16. package/dist/components/llm_cost_dashboard/index.d.ts.map +1 -0
  17. package/dist/components/llm_cost_dashboard/index.js +5 -0
  18. package/dist/components/llm_cost_dashboard/index.js.map +1 -0
  19. package/dist/components/llm_cost_dashboard/llm_cost_dashboard.d.ts +16 -0
  20. package/dist/components/llm_cost_dashboard/llm_cost_dashboard.d.ts.map +1 -0
  21. package/dist/components/llm_cost_dashboard/llm_cost_dashboard.js +154 -0
  22. package/dist/components/llm_cost_dashboard/llm_cost_dashboard.js.map +1 -0
  23. package/dist/index.d.ts +2 -1
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/lib/cascade/cascade_runner.d.ts +50 -0
  26. package/dist/lib/cascade/cascade_runner.d.ts.map +1 -0
  27. package/dist/lib/cascade/cascade_runner.js +115 -0
  28. package/dist/lib/cascade/cascade_runner.js.map +1 -0
  29. package/dist/lib/cascade/index.d.ts +5 -0
  30. package/dist/lib/cascade/index.d.ts.map +1 -0
  31. package/dist/lib/cascade/index.js +4 -0
  32. package/dist/lib/cascade/index.js.map +1 -0
  33. package/dist/lib/cascade/types.d.ts +35 -0
  34. package/dist/lib/cascade/types.d.ts.map +1 -0
  35. package/dist/lib/cascade/types.js +14 -0
  36. package/dist/lib/cascade/types.js.map +1 -0
  37. package/dist/lib/cost_cap/cost_cap.d.ts +40 -0
  38. package/dist/lib/cost_cap/cost_cap.d.ts.map +1 -0
  39. package/dist/lib/cost_cap/cost_cap.js +150 -0
  40. package/dist/lib/cost_cap/cost_cap.js.map +1 -0
  41. package/dist/lib/cost_cap/index.d.ts +3 -0
  42. package/dist/lib/cost_cap/index.d.ts.map +1 -0
  43. package/dist/lib/cost_cap/index.js +2 -0
  44. package/dist/lib/cost_cap/index.js.map +1 -0
  45. package/dist/lib/database/init_api_log.d.ts +10 -0
  46. package/dist/lib/database/init_api_log.d.ts.map +1 -0
  47. package/dist/lib/database/init_api_log.js +91 -0
  48. package/dist/lib/database/init_api_log.js.map +1 -0
  49. package/dist/lib/hazo_connect/direct_db_connect.d.ts +11 -7
  50. package/dist/lib/hazo_connect/direct_db_connect.d.ts.map +1 -1
  51. package/dist/lib/hazo_connect/direct_db_connect.js +59 -6
  52. package/dist/lib/hazo_connect/direct_db_connect.js.map +1 -1
  53. package/dist/lib/hazo_connect/types.d.ts +35 -3
  54. package/dist/lib/hazo_connect/types.d.ts.map +1 -1
  55. package/dist/lib/llm_api/embed_cache.d.ts +15 -0
  56. package/dist/lib/llm_api/embed_cache.d.ts.map +1 -0
  57. package/dist/lib/llm_api/embed_cache.js +53 -0
  58. package/dist/lib/llm_api/embed_cache.js.map +1 -0
  59. package/dist/lib/llm_api/hazo_llm_document_text.d.ts.map +1 -1
  60. package/dist/lib/llm_api/hazo_llm_document_text.js +56 -14
  61. package/dist/lib/llm_api/hazo_llm_document_text.js.map +1 -1
  62. package/dist/lib/llm_api/hazo_llm_dynamic_data_extract.d.ts.map +1 -1
  63. package/dist/lib/llm_api/hazo_llm_dynamic_data_extract.js +19 -1
  64. package/dist/lib/llm_api/hazo_llm_dynamic_data_extract.js.map +1 -1
  65. package/dist/lib/llm_api/hazo_llm_embed.d.ts +10 -0
  66. package/dist/lib/llm_api/hazo_llm_embed.d.ts.map +1 -0
  67. package/dist/lib/llm_api/hazo_llm_embed.js +80 -0
  68. package/dist/lib/llm_api/hazo_llm_embed.js.map +1 -0
  69. package/dist/lib/llm_api/hazo_llm_image_image.d.ts.map +1 -1
  70. package/dist/lib/llm_api/hazo_llm_image_image.js +56 -14
  71. package/dist/lib/llm_api/hazo_llm_image_image.js.map +1 -1
  72. package/dist/lib/llm_api/hazo_llm_image_text.d.ts.map +1 -1
  73. package/dist/lib/llm_api/hazo_llm_image_text.js +56 -14
  74. package/dist/lib/llm_api/hazo_llm_image_text.js.map +1 -1
  75. package/dist/lib/llm_api/hazo_llm_prompt_chain.d.ts.map +1 -1
  76. package/dist/lib/llm_api/hazo_llm_prompt_chain.js +17 -1
  77. package/dist/lib/llm_api/hazo_llm_prompt_chain.js.map +1 -1
  78. package/dist/lib/llm_api/hazo_llm_text_image.d.ts.map +1 -1
  79. package/dist/lib/llm_api/hazo_llm_text_image.js +56 -14
  80. package/dist/lib/llm_api/hazo_llm_text_image.js.map +1 -1
  81. package/dist/lib/llm_api/hazo_llm_text_text.d.ts.map +1 -1
  82. package/dist/lib/llm_api/hazo_llm_text_text.js +90 -15
  83. package/dist/lib/llm_api/hazo_llm_text_text.js.map +1 -1
  84. package/dist/lib/llm_api/index.d.ts +29 -1
  85. package/dist/lib/llm_api/index.d.ts.map +1 -1
  86. package/dist/lib/llm_api/index.js +433 -6
  87. package/dist/lib/llm_api/index.js.map +1 -1
  88. package/dist/lib/llm_api/prompt_parts_helper.d.ts +15 -0
  89. package/dist/lib/llm_api/prompt_parts_helper.d.ts.map +1 -0
  90. package/dist/lib/llm_api/prompt_parts_helper.js +9 -0
  91. package/dist/lib/llm_api/prompt_parts_helper.js.map +1 -0
  92. package/dist/lib/llm_api/types.d.ts +187 -2
  93. package/dist/lib/llm_api/types.d.ts.map +1 -1
  94. package/dist/lib/llm_api/types.js +4 -0
  95. package/dist/lib/llm_api/types.js.map +1 -1
  96. package/dist/lib/maintenance/purge_log_job.d.ts +23 -0
  97. package/dist/lib/maintenance/purge_log_job.d.ts.map +1 -0
  98. package/dist/lib/maintenance/purge_log_job.js +42 -0
  99. package/dist/lib/maintenance/purge_log_job.js.map +1 -0
  100. package/dist/lib/observability/log_context.d.ts +15 -0
  101. package/dist/lib/observability/log_context.d.ts.map +1 -0
  102. package/dist/lib/observability/log_context.js +32 -0
  103. package/dist/lib/observability/log_context.js.map +1 -0
  104. package/dist/lib/observability/log_writer.d.ts +35 -0
  105. package/dist/lib/observability/log_writer.d.ts.map +1 -0
  106. package/dist/lib/observability/log_writer.js +106 -0
  107. package/dist/lib/observability/log_writer.js.map +1 -0
  108. package/dist/lib/observability/queries.d.ts +15 -0
  109. package/dist/lib/observability/queries.d.ts.map +1 -0
  110. package/dist/lib/observability/queries.js +78 -0
  111. package/dist/lib/observability/queries.js.map +1 -0
  112. package/dist/lib/observability/types.d.ts +77 -0
  113. package/dist/lib/observability/types.d.ts.map +1 -0
  114. package/dist/lib/observability/types.js +8 -0
  115. package/dist/lib/observability/types.js.map +1 -0
  116. package/dist/lib/pricing/pricing.d.ts +49 -0
  117. package/dist/lib/pricing/pricing.d.ts.map +1 -0
  118. package/dist/lib/pricing/pricing.js +153 -0
  119. package/dist/lib/pricing/pricing.js.map +1 -0
  120. package/dist/lib/pricing/pricing.json +75 -0
  121. package/dist/lib/pricing/types.d.ts +58 -0
  122. package/dist/lib/pricing/types.d.ts.map +1 -0
  123. package/dist/lib/pricing/types.js +8 -0
  124. package/dist/lib/pricing/types.js.map +1 -0
  125. package/dist/lib/providers/anthropic/anthropic_client.d.ts +71 -0
  126. package/dist/lib/providers/anthropic/anthropic_client.d.ts.map +1 -0
  127. package/dist/lib/providers/anthropic/anthropic_client.js +134 -0
  128. package/dist/lib/providers/anthropic/anthropic_client.js.map +1 -0
  129. package/dist/lib/providers/anthropic/anthropic_provider.d.ts +60 -0
  130. package/dist/lib/providers/anthropic/anthropic_provider.d.ts.map +1 -0
  131. package/dist/lib/providers/anthropic/anthropic_provider.js +273 -0
  132. package/dist/lib/providers/anthropic/anthropic_provider.js.map +1 -0
  133. package/dist/lib/providers/anthropic/anthropic_response_to_usage.d.ts +21 -0
  134. package/dist/lib/providers/anthropic/anthropic_response_to_usage.d.ts.map +1 -0
  135. package/dist/lib/providers/anthropic/anthropic_response_to_usage.js +46 -0
  136. package/dist/lib/providers/anthropic/anthropic_response_to_usage.js.map +1 -0
  137. package/dist/lib/providers/anthropic/index.d.ts +3 -0
  138. package/dist/lib/providers/anthropic/index.d.ts.map +1 -0
  139. package/dist/lib/providers/anthropic/index.js +2 -0
  140. package/dist/lib/providers/anthropic/index.js.map +1 -0
  141. package/dist/lib/providers/deepseek/deepseek_client.d.ts +55 -0
  142. package/dist/lib/providers/deepseek/deepseek_client.d.ts.map +1 -0
  143. package/dist/lib/providers/deepseek/deepseek_client.js +129 -0
  144. package/dist/lib/providers/deepseek/deepseek_client.js.map +1 -0
  145. package/dist/lib/providers/deepseek/deepseek_provider.d.ts +50 -0
  146. package/dist/lib/providers/deepseek/deepseek_provider.d.ts.map +1 -0
  147. package/dist/lib/providers/deepseek/deepseek_provider.js +147 -0
  148. package/dist/lib/providers/deepseek/deepseek_provider.js.map +1 -0
  149. package/dist/lib/providers/deepseek/deepseek_response_to_usage.d.ts +21 -0
  150. package/dist/lib/providers/deepseek/deepseek_response_to_usage.d.ts.map +1 -0
  151. package/dist/lib/providers/deepseek/deepseek_response_to_usage.js +40 -0
  152. package/dist/lib/providers/deepseek/deepseek_response_to_usage.js.map +1 -0
  153. package/dist/lib/providers/deepseek/index.d.ts +3 -0
  154. package/dist/lib/providers/deepseek/index.d.ts.map +1 -0
  155. package/dist/lib/providers/deepseek/index.js +2 -0
  156. package/dist/lib/providers/deepseek/index.js.map +1 -0
  157. package/dist/lib/providers/gemini/gemini_provider.d.ts.map +1 -1
  158. package/dist/lib/providers/gemini/gemini_provider.js +40 -4
  159. package/dist/lib/providers/gemini/gemini_provider.js.map +1 -1
  160. package/dist/lib/providers/gemini/gemini_response_to_usage.d.ts +37 -0
  161. package/dist/lib/providers/gemini/gemini_response_to_usage.d.ts.map +1 -0
  162. package/dist/lib/providers/gemini/gemini_response_to_usage.js +49 -0
  163. package/dist/lib/providers/gemini/gemini_response_to_usage.js.map +1 -0
  164. package/dist/lib/providers/index.d.ts +3 -0
  165. package/dist/lib/providers/index.d.ts.map +1 -1
  166. package/dist/lib/providers/index.js +3 -0
  167. package/dist/lib/providers/index.js.map +1 -1
  168. package/dist/lib/providers/openai/index.d.ts +3 -0
  169. package/dist/lib/providers/openai/index.d.ts.map +1 -0
  170. package/dist/lib/providers/openai/index.js +2 -0
  171. package/dist/lib/providers/openai/index.js.map +1 -0
  172. package/dist/lib/providers/openai/openai_client.d.ts +99 -0
  173. package/dist/lib/providers/openai/openai_client.d.ts.map +1 -0
  174. package/dist/lib/providers/openai/openai_client.js +187 -0
  175. package/dist/lib/providers/openai/openai_client.js.map +1 -0
  176. package/dist/lib/providers/openai/openai_provider.d.ts +66 -0
  177. package/dist/lib/providers/openai/openai_provider.d.ts.map +1 -0
  178. package/dist/lib/providers/openai/openai_provider.js +297 -0
  179. package/dist/lib/providers/openai/openai_provider.js.map +1 -0
  180. package/dist/lib/providers/openai/openai_response_to_usage.d.ts +21 -0
  181. package/dist/lib/providers/openai/openai_response_to_usage.d.ts.map +1 -0
  182. package/dist/lib/providers/openai/openai_response_to_usage.js +50 -0
  183. package/dist/lib/providers/openai/openai_response_to_usage.js.map +1 -0
  184. package/dist/lib/providers/qwen/qwen_provider.d.ts.map +1 -1
  185. package/dist/lib/providers/qwen/qwen_provider.js +52 -5
  186. package/dist/lib/providers/qwen/qwen_provider.js.map +1 -1
  187. package/dist/lib/providers/qwen/qwen_response_to_usage.d.ts +36 -0
  188. package/dist/lib/providers/qwen/qwen_response_to_usage.d.ts.map +1 -0
  189. package/dist/lib/providers/qwen/qwen_response_to_usage.js +50 -0
  190. package/dist/lib/providers/qwen/qwen_response_to_usage.js.map +1 -0
  191. package/dist/lib/providers/types.d.ts +16 -6
  192. package/dist/lib/providers/types.d.ts.map +1 -1
  193. package/dist/lib/providers/types.js +1 -0
  194. package/dist/lib/providers/types.js.map +1 -1
  195. package/dist/lib/utils.d.ts +13 -0
  196. package/dist/lib/utils.d.ts.map +1 -0
  197. package/dist/lib/utils.js +16 -0
  198. package/dist/lib/utils.js.map +1 -0
  199. package/dist/server.d.ts +16 -2
  200. package/dist/server.d.ts.map +1 -1
  201. package/dist/server.js +29 -2
  202. package/dist/server.js.map +1 -1
  203. package/migrations/hazo_llm_api_log.sql +69 -0
  204. package/package.json +19 -5
@@ -25,11 +25,19 @@ import { hazo_llm_image_image_text as hazo_llm_image_image_text_internal } from
25
25
  import { hazo_llm_prompt_chain as hazo_llm_prompt_chain_internal } from './hazo_llm_prompt_chain.js';
26
26
  import { hazo_llm_document_text as hazo_llm_document_text_internal } from './hazo_llm_document_text.js';
27
27
  import { hazo_llm_dynamic_data_extract as hazo_llm_dynamic_data_extract_internal } from './hazo_llm_dynamic_data_extract.js';
28
+ import { hazo_llm_embed_internal } from './hazo_llm_embed.js';
29
+ import { configure_embed_cache } from './embed_cache.js';
28
30
  import { register_provider, set_enabled_llms, set_primary_llm, get_primary_llm, get_provider, } from '../providers/registry.js';
29
31
  import { GeminiProvider } from '../providers/gemini/index.js';
30
32
  import { QwenProvider } from '../providers/qwen/index.js';
33
+ import { AnthropicProvider } from '../providers/anthropic/index.js';
34
+ import { OpenAIProvider } from '../providers/openai/index.js';
35
+ import { DeepSeekProvider } from '../providers/deepseek/index.js';
31
36
  import { SERVICE_TYPES } from '../providers/types.js';
32
37
  import { create_request_context, call_before_request_hook, call_after_response_hook, call_on_error_hook, detect_error_code, } from './provider_helper.js';
38
+ import { ensure_api_log_table } from '../database/init_api_log.js';
39
+ import { create_default_after_response_hook } from '../observability/log_writer.js';
40
+ import { load_pricing, set_pricing_overrides } from '../pricing/pricing.js';
33
41
  import * as fs from 'fs';
34
42
  import * as path from 'path';
35
43
  import * as ini from 'ini';
@@ -40,6 +48,8 @@ let initialized = false;
40
48
  let db_auto_initialized = false;
41
49
  let current_config = null;
42
50
  let stored_hazo_connect = null;
51
+ let stored_cascade_config = null;
52
+ let stored_cost_cap_config = null;
43
53
  // =============================================================================
44
54
  // Default Logger
45
55
  // =============================================================================
@@ -403,7 +413,7 @@ function load_api_key_from_env(provider_name) {
403
413
  }
404
414
  /**
405
415
  * Read LLM global config from [llm] section
406
- * @returns Object with enabled_llms and primary_llm
416
+ * @returns Object with enabled_llms, primary_llm, sqlite_path, and optional cascade config
407
417
  */
408
418
  function get_llm_global_config() {
409
419
  const config_path = find_config_file();
@@ -424,10 +434,37 @@ function get_llm_global_config() {
424
434
  const enabled_llms = parse_enabled_llms(llm_section.enabled_llms);
425
435
  const primary_llm = llm_section.primary_llm || default_primary;
426
436
  const sqlite_path = llm_section.sqlite_path || default_sqlite;
437
+ const cascade_section = config.llm_cascade;
438
+ let parsed_cascade;
439
+ if (cascade_section) {
440
+ parsed_cascade = {};
441
+ if (cascade_section.providers) {
442
+ parsed_cascade.providers = String(cascade_section.providers)
443
+ .split(',')
444
+ .map((s) => s.trim())
445
+ .filter(Boolean);
446
+ }
447
+ if (cascade_section.timeout_ms_per_attempt) {
448
+ const ms = parseInt(String(cascade_section.timeout_ms_per_attempt), 10);
449
+ if (!isNaN(ms) && ms > 0) {
450
+ parsed_cascade.timeout_ms_per_attempt = ms;
451
+ }
452
+ }
453
+ if (cascade_section.cascade_on_codes) {
454
+ const codes = String(cascade_section.cascade_on_codes)
455
+ .split(',')
456
+ .map((s) => s.trim())
457
+ .filter(Boolean);
458
+ if (codes.length > 0) {
459
+ parsed_cascade.cascade_on_codes = codes;
460
+ }
461
+ }
462
+ }
427
463
  return {
428
464
  enabled_llms: enabled_llms.length > 0 ? enabled_llms : default_enabled,
429
465
  primary_llm,
430
466
  sqlite_path,
467
+ cascade: parsed_cascade,
431
468
  };
432
469
  }
433
470
  catch {
@@ -435,6 +472,7 @@ function get_llm_global_config() {
435
472
  enabled_llms: default_enabled,
436
473
  primary_llm: default_primary,
437
474
  sqlite_path: default_sqlite,
475
+ cascade: undefined,
438
476
  };
439
477
  }
440
478
  }
@@ -660,6 +698,220 @@ function load_qwen_provider_from_config(logger) {
660
698
  return null;
661
699
  }
662
700
  }
701
+ /**
702
+ * Load Anthropic provider from config file
703
+ * Reads [llm_anthropic] section + ANTHROPIC_API_KEY env var
704
+ */
705
+ function load_anthropic_provider_from_config(logger) {
706
+ const config_path = find_config_file();
707
+ if (!config_path) {
708
+ logger.warn('Config file not found, cannot load Anthropic provider', {
709
+ file: 'index.ts',
710
+ line: 0,
711
+ });
712
+ return null;
713
+ }
714
+ try {
715
+ const config_content = fs.readFileSync(config_path, 'utf-8');
716
+ const config = ini.parse(config_content);
717
+ const section = config.llm_anthropic || {};
718
+ const env_var_name = section.api_key_env || 'ANTHROPIC_API_KEY';
719
+ const api_key = process.env[env_var_name];
720
+ if (!api_key) {
721
+ logger.error(`${env_var_name} not found in environment variables`, {
722
+ file: 'index.ts',
723
+ line: 0,
724
+ data: { config_path, env_var_name },
725
+ });
726
+ return null;
727
+ }
728
+ const capabilities = parse_capabilities(section.capabilities);
729
+ const generation_config = {};
730
+ let has_config = false;
731
+ if (section.text_max_tokens !== undefined) {
732
+ const v = parseInt(section.text_max_tokens, 10);
733
+ if (!isNaN(v)) {
734
+ generation_config.max_tokens = v;
735
+ has_config = true;
736
+ }
737
+ }
738
+ if (section.text_temperature !== undefined) {
739
+ const v = parseFloat(section.text_temperature);
740
+ if (!isNaN(v)) {
741
+ generation_config.temperature = v;
742
+ has_config = true;
743
+ }
744
+ }
745
+ const provider_config = {
746
+ api_key,
747
+ api_url: section.api_url,
748
+ api_version: section.api_version,
749
+ model_text_text: section.model_text_text,
750
+ model_image_text: section.model_image_text,
751
+ model_document_text: section.model_document_text,
752
+ generation_config: has_config ? generation_config : undefined,
753
+ capabilities: capabilities.length > 0 ? capabilities : undefined,
754
+ logger,
755
+ };
756
+ return new AnthropicProvider(provider_config);
757
+ }
758
+ catch (error) {
759
+ const error_message = error instanceof Error ? error.message : String(error);
760
+ logger.error('Failed to load Anthropic provider from config', {
761
+ file: 'index.ts',
762
+ line: 0,
763
+ data: { error: error_message, config_path },
764
+ });
765
+ return null;
766
+ }
767
+ }
768
+ /**
769
+ * Load OpenAI provider from config file
770
+ * Reads [llm_openai] section + OPENAI_API_KEY env var
771
+ */
772
+ function load_openai_provider_from_config(logger) {
773
+ const config_path = find_config_file();
774
+ if (!config_path) {
775
+ logger.warn('Config file not found, cannot load OpenAI provider', {
776
+ file: 'index.ts',
777
+ line: 0,
778
+ });
779
+ return null;
780
+ }
781
+ try {
782
+ const config_content = fs.readFileSync(config_path, 'utf-8');
783
+ const config = ini.parse(config_content);
784
+ const section = config.llm_openai || {};
785
+ const env_var_name = section.api_key_env || 'OPENAI_API_KEY';
786
+ const api_key = process.env[env_var_name];
787
+ if (!api_key) {
788
+ logger.error(`${env_var_name} not found in environment variables`, {
789
+ file: 'index.ts',
790
+ line: 0,
791
+ data: { config_path, env_var_name },
792
+ });
793
+ return null;
794
+ }
795
+ const capabilities = parse_capabilities(section.capabilities);
796
+ const generation_config = {};
797
+ let has_config = false;
798
+ if (section.text_temperature !== undefined) {
799
+ const v = parseFloat(section.text_temperature);
800
+ if (!isNaN(v)) {
801
+ generation_config.temperature = v;
802
+ has_config = true;
803
+ }
804
+ }
805
+ if (section.text_top_p !== undefined) {
806
+ const v = parseFloat(section.text_top_p);
807
+ if (!isNaN(v)) {
808
+ generation_config.top_p = v;
809
+ has_config = true;
810
+ }
811
+ }
812
+ if (section.text_max_tokens !== undefined) {
813
+ const v = parseInt(section.text_max_tokens, 10);
814
+ if (!isNaN(v)) {
815
+ generation_config.max_tokens = v;
816
+ has_config = true;
817
+ }
818
+ }
819
+ const provider_config = {
820
+ api_key,
821
+ api_url: section.api_url,
822
+ api_url_image: section.api_url_image,
823
+ api_url_embed: section.api_url_embed,
824
+ model_text_text: section.model_text_text,
825
+ model_image_text: section.model_image_text,
826
+ model_text_image: section.model_text_image,
827
+ model_embed: section.model_embed,
828
+ generation_config: has_config ? generation_config : undefined,
829
+ capabilities: capabilities.length > 0 ? capabilities : undefined,
830
+ logger,
831
+ };
832
+ return new OpenAIProvider(provider_config);
833
+ }
834
+ catch (error) {
835
+ const error_message = error instanceof Error ? error.message : String(error);
836
+ logger.error('Failed to load OpenAI provider from config', {
837
+ file: 'index.ts',
838
+ line: 0,
839
+ data: { error: error_message, config_path },
840
+ });
841
+ return null;
842
+ }
843
+ }
844
+ /**
845
+ * Load DeepSeek provider from config file
846
+ * Reads [llm_deepseek] section + DEEPSEEK_API_KEY env var
847
+ */
848
+ function load_deepseek_provider_from_config(logger) {
849
+ const config_path = find_config_file();
850
+ if (!config_path) {
851
+ logger.warn('Config file not found, cannot load DeepSeek provider', {
852
+ file: 'index.ts',
853
+ line: 0,
854
+ });
855
+ return null;
856
+ }
857
+ try {
858
+ const config_content = fs.readFileSync(config_path, 'utf-8');
859
+ const config = ini.parse(config_content);
860
+ const section = config.llm_deepseek || {};
861
+ const env_var_name = section.api_key_env || 'DEEPSEEK_API_KEY';
862
+ const api_key = process.env[env_var_name];
863
+ if (!api_key) {
864
+ logger.error(`${env_var_name} not found in environment variables`, {
865
+ file: 'index.ts',
866
+ line: 0,
867
+ data: { config_path, env_var_name },
868
+ });
869
+ return null;
870
+ }
871
+ const capabilities = parse_capabilities(section.capabilities);
872
+ const generation_config = {};
873
+ let has_config = false;
874
+ if (section.text_temperature !== undefined) {
875
+ const v = parseFloat(section.text_temperature);
876
+ if (!isNaN(v)) {
877
+ generation_config.temperature = v;
878
+ has_config = true;
879
+ }
880
+ }
881
+ if (section.text_top_p !== undefined) {
882
+ const v = parseFloat(section.text_top_p);
883
+ if (!isNaN(v)) {
884
+ generation_config.top_p = v;
885
+ has_config = true;
886
+ }
887
+ }
888
+ if (section.text_max_tokens !== undefined) {
889
+ const v = parseInt(section.text_max_tokens, 10);
890
+ if (!isNaN(v)) {
891
+ generation_config.max_tokens = v;
892
+ has_config = true;
893
+ }
894
+ }
895
+ const provider_config = {
896
+ api_key,
897
+ api_url: section.api_url,
898
+ model_text_text: section.model_text_text,
899
+ generation_config: has_config ? generation_config : undefined,
900
+ capabilities: capabilities.length > 0 ? capabilities : undefined,
901
+ logger,
902
+ };
903
+ return new DeepSeekProvider(provider_config);
904
+ }
905
+ catch (error) {
906
+ const error_message = error instanceof Error ? error.message : String(error);
907
+ logger.error('Failed to load DeepSeek provider from config', {
908
+ file: 'index.ts',
909
+ line: 0,
910
+ data: { error: error_message, config_path },
911
+ });
912
+ return null;
913
+ }
914
+ }
663
915
  /**
664
916
  * Load and register all enabled providers from config file
665
917
  * @param logger - Logger instance
@@ -697,7 +949,46 @@ function load_and_register_providers(logger) {
697
949
  });
698
950
  }
699
951
  }
700
- // Future: Add other providers here (OpenAI, Anthropic, etc.)
952
+ else if (llm_name.toLowerCase() === 'anthropic') {
953
+ const provider = load_anthropic_provider_from_config(logger);
954
+ if (provider) {
955
+ register_provider(provider);
956
+ }
957
+ else {
958
+ logger.warn('Anthropic provider is enabled in config but failed to load. Check ANTHROPIC_API_KEY in environment variables.', {
959
+ file: 'index.ts',
960
+ line: 0,
961
+ data: { llm_name: llm_name.toLowerCase() },
962
+ });
963
+ }
964
+ }
965
+ else if (llm_name.toLowerCase() === 'openai') {
966
+ const provider = load_openai_provider_from_config(logger);
967
+ if (provider) {
968
+ register_provider(provider);
969
+ }
970
+ else {
971
+ logger.warn('OpenAI provider is enabled in config but failed to load. Check OPENAI_API_KEY in environment variables.', {
972
+ file: 'index.ts',
973
+ line: 0,
974
+ data: { llm_name: llm_name.toLowerCase() },
975
+ });
976
+ }
977
+ }
978
+ else if (llm_name.toLowerCase() === 'deepseek') {
979
+ const provider = load_deepseek_provider_from_config(logger);
980
+ if (provider) {
981
+ register_provider(provider);
982
+ }
983
+ else {
984
+ logger.warn('DeepSeek provider is enabled in config but failed to load. Check DEEPSEEK_API_KEY in environment variables.', {
985
+ file: 'index.ts',
986
+ line: 0,
987
+ data: { llm_name: llm_name.toLowerCase() },
988
+ });
989
+ }
990
+ }
991
+ // Future: Add other providers here
701
992
  }
702
993
  }
703
994
  // =============================================================================
@@ -758,10 +1049,12 @@ export async function initialize_llm_api(config = {}) {
758
1049
  const logger = config.logger || default_logger;
759
1050
  // Store the logger for use by other functions
760
1051
  set_logger(logger);
761
- // Store hooks if provided
762
- if (config.hooks) {
763
- set_hooks(config.hooks);
764
- }
1052
+ // Reset hooks will be set correctly after connect is established below
1053
+ set_hooks({});
1054
+ // Reset cascade config — will be set correctly below
1055
+ stored_cascade_config = null;
1056
+ // Reset cost cap config — will be set correctly below
1057
+ stored_cost_cap_config = null;
765
1058
  // Get global config from file
766
1059
  const global_config = get_llm_global_config();
767
1060
  // Use provided sqlite_path or fall back to config file value
@@ -803,6 +1096,66 @@ export async function initialize_llm_api(config = {}) {
803
1096
  }
804
1097
  initialized = true;
805
1098
  current_config = final_config;
1099
+ // Apply pricing table and any consumer overrides
1100
+ load_pricing();
1101
+ if (config.pricing_overrides) {
1102
+ set_pricing_overrides(config.pricing_overrides);
1103
+ }
1104
+ // Store resolved cascade config: init-time config takes precedence over INI
1105
+ stored_cascade_config = config.cascade ?? global_config.cascade ?? null;
1106
+ // Store cost cap config from consumer
1107
+ stored_cost_cap_config = config.cost_cap ?? null;
1108
+ // Configure embed cache from config
1109
+ if (config.embed_cache) {
1110
+ configure_embed_cache({
1111
+ max_size: config.embed_cache.max_size,
1112
+ keyv: config.embed_cache.keyv,
1113
+ });
1114
+ }
1115
+ // Set up the log table + default afterResponse hook unless explicitly disabled
1116
+ const resolved_connect = stored_hazo_connect;
1117
+ const api_log_enabled = config.api_log?.enabled !== false;
1118
+ const consumer_after = config.hooks?.afterResponse;
1119
+ if (api_log_enabled && resolved_connect) {
1120
+ try {
1121
+ await ensure_api_log_table(resolved_connect, logger);
1122
+ // Ensure cost cap index exists for fast session-based SUM queries
1123
+ const { ensure_cost_cap_index } = await import('../cost_cap/cost_cap.js');
1124
+ await ensure_cost_cap_index(resolved_connect, logger);
1125
+ const default_hook = create_default_after_response_hook({
1126
+ connect: resolved_connect,
1127
+ logger,
1128
+ extract_context: config.api_log?.extract_context,
1129
+ });
1130
+ // Chain: default hook first, then consumer hook (if provided)
1131
+ const chained = consumer_after
1132
+ ? async (ctx) => {
1133
+ await default_hook(ctx);
1134
+ await consumer_after(ctx);
1135
+ }
1136
+ : default_hook;
1137
+ set_hooks({
1138
+ ...get_hooks(), // preserve any other hooks (beforeRequest, onError)
1139
+ ...config.hooks, // merge consumer hooks
1140
+ afterResponse: chained,
1141
+ });
1142
+ }
1143
+ catch (err) {
1144
+ logger.warn('api_log setup failed; calls will not be logged', {
1145
+ error: err instanceof Error ? err.message : String(err),
1146
+ });
1147
+ // Fall through — install consumer hooks only, don't override with failing default
1148
+ if (config.hooks) {
1149
+ set_hooks(config.hooks);
1150
+ }
1151
+ }
1152
+ }
1153
+ else {
1154
+ // api_log disabled or no connect: install consumer hooks as-is (or leave empty)
1155
+ if (config.hooks) {
1156
+ set_hooks(config.hooks);
1157
+ }
1158
+ }
806
1159
  // LLM API initialized successfully
807
1160
  }
808
1161
  catch (error) {
@@ -1116,6 +1469,36 @@ export async function hazo_llm_dynamic_data_extract(params, llm) {
1116
1469
  }
1117
1470
  }
1118
1471
  // =============================================================================
1472
+ // Embed Function
1473
+ // =============================================================================
1474
+ /**
1475
+ * Generate vector embeddings for one or more text inputs.
1476
+ * Caches results in-memory (or via BYO Keyv) and deduplicates repeated texts in batches.
1477
+ *
1478
+ * @param params - Text string or array of strings to embed, plus optional model override
1479
+ * @returns EmbedResponse with vectors, dimensions, model, and cache flags
1480
+ *
1481
+ * @example
1482
+ * ```typescript
1483
+ * import { hazo_llm_embed } from 'hazo_llm_api/server';
1484
+ *
1485
+ * const result = await hazo_llm_embed({ text: 'Hello world' });
1486
+ * if (result.success) {
1487
+ * console.log(result.vectors?.[0]); // number[]
1488
+ * }
1489
+ * ```
1490
+ */
1491
+ export async function hazo_llm_embed(params, _llm) {
1492
+ try {
1493
+ const config = check_initialized();
1494
+ const logger = config.logger || default_logger;
1495
+ return hazo_llm_embed_internal(params, logger);
1496
+ }
1497
+ catch (error) {
1498
+ return { success: false, error: error instanceof Error ? error.message : String(error) };
1499
+ }
1500
+ }
1501
+ // =============================================================================
1119
1502
  // Streaming Functions
1120
1503
  // =============================================================================
1121
1504
  /**
@@ -1173,6 +1556,21 @@ export async function* hazo_llm_text_text_stream(params, llm) {
1173
1556
  };
1174
1557
  return;
1175
1558
  }
1559
+ // Cost cap pre-call gate
1560
+ const cost_cap_config = get_cost_cap_config();
1561
+ if (cost_cap_config) {
1562
+ const { check_cost_cap_gate } = await import('../cost_cap/cost_cap.js');
1563
+ const cap_block = await check_cost_cap_gate(stored_hazo_connect, cost_cap_config, logger);
1564
+ if (cap_block !== null) {
1565
+ yield {
1566
+ text: '',
1567
+ done: true,
1568
+ error: cap_block.error ?? cap_block.error_info?.message,
1569
+ error_info: cap_block.error_info,
1570
+ };
1571
+ return;
1572
+ }
1573
+ }
1176
1574
  // Call hooks and streaming method
1177
1575
  const request_context = create_request_context(SERVICE_TYPES.TEXT_TEXT, provider.get_name(), {
1178
1576
  prompt: params.prompt,
@@ -1267,6 +1665,21 @@ export async function* hazo_llm_image_text_stream(params, llm) {
1267
1665
  };
1268
1666
  return;
1269
1667
  }
1668
+ // Cost cap pre-call gate
1669
+ const cost_cap_config = get_cost_cap_config();
1670
+ if (cost_cap_config) {
1671
+ const { check_cost_cap_gate } = await import('../cost_cap/cost_cap.js');
1672
+ const cap_block = await check_cost_cap_gate(stored_hazo_connect, cost_cap_config, logger);
1673
+ if (cap_block !== null) {
1674
+ yield {
1675
+ text: '',
1676
+ done: true,
1677
+ error: cap_block.error ?? cap_block.error_info?.message,
1678
+ error_info: cap_block.error_info,
1679
+ };
1680
+ return;
1681
+ }
1682
+ }
1270
1683
  // Call hooks and streaming method
1271
1684
  const request_context = create_request_context(SERVICE_TYPES.IMAGE_TEXT, provider.get_name(), {
1272
1685
  prompt: params.prompt,
@@ -1329,6 +1742,20 @@ export async function* hazo_llm_image_text_stream(params, llm) {
1329
1742
  export function is_initialized() {
1330
1743
  return initialized;
1331
1744
  }
1745
+ /**
1746
+ * Get the current cascade configuration (from init or INI).
1747
+ * Returns null if no cascade config has been set.
1748
+ */
1749
+ export function get_cascade_config() {
1750
+ return stored_cascade_config;
1751
+ }
1752
+ /**
1753
+ * Get the current cost cap configuration (set during initialize_llm_api).
1754
+ * Returns null if no cost cap config has been set.
1755
+ */
1756
+ export function get_cost_cap_config() {
1757
+ return stored_cost_cap_config;
1758
+ }
1332
1759
  /**
1333
1760
  * Get the current configuration (without sensitive logger)
1334
1761
  * @returns Current configuration or null if not initialized