@modular-prompt/driver 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/anthropic/anthropic-driver.d.ts +38 -8
  2. package/dist/anthropic/anthropic-driver.d.ts.map +1 -1
  3. package/dist/anthropic/anthropic-driver.js +180 -164
  4. package/dist/anthropic/anthropic-driver.js.map +1 -1
  5. package/dist/cache-controller.d.ts +28 -0
  6. package/dist/cache-controller.d.ts.map +1 -0
  7. package/dist/cache-controller.js +2 -0
  8. package/dist/cache-controller.js.map +1 -0
  9. package/dist/cache-utils.d.ts +20 -0
  10. package/dist/cache-utils.d.ts.map +1 -0
  11. package/dist/cache-utils.js +71 -0
  12. package/dist/cache-utils.js.map +1 -0
  13. package/dist/content-utils.d.ts.map +1 -1
  14. package/dist/content-utils.js +20 -0
  15. package/dist/content-utils.js.map +1 -1
  16. package/dist/driver-registry/config-based-factory.d.ts.map +1 -1
  17. package/dist/driver-registry/config-based-factory.js +7 -0
  18. package/dist/driver-registry/config-based-factory.js.map +1 -1
  19. package/dist/driver-registry/factory-helper.d.ts.map +1 -1
  20. package/dist/driver-registry/factory-helper.js +7 -4
  21. package/dist/driver-registry/factory-helper.js.map +1 -1
  22. package/dist/driver-registry/types.d.ts +6 -0
  23. package/dist/driver-registry/types.d.ts.map +1 -1
  24. package/dist/formatter/converter.js +1 -1
  25. package/dist/formatter/converter.js.map +1 -1
  26. package/dist/google-genai/element-converter.d.ts +11 -0
  27. package/dist/google-genai/element-converter.d.ts.map +1 -0
  28. package/dist/google-genai/element-converter.js +126 -0
  29. package/dist/google-genai/element-converter.js.map +1 -0
  30. package/dist/google-genai/google-genai-cache-controller.d.ts +24 -0
  31. package/dist/google-genai/google-genai-cache-controller.d.ts.map +1 -0
  32. package/dist/google-genai/google-genai-cache-controller.js +127 -0
  33. package/dist/google-genai/google-genai-cache-controller.js.map +1 -0
  34. package/dist/google-genai/google-genai-driver.d.ts +5 -29
  35. package/dist/google-genai/google-genai-driver.d.ts.map +1 -1
  36. package/dist/google-genai/google-genai-driver.js +92 -255
  37. package/dist/google-genai/google-genai-driver.js.map +1 -1
  38. package/dist/index.d.ts +4 -0
  39. package/dist/index.d.ts.map +1 -1
  40. package/dist/index.js +3 -0
  41. package/dist/index.js.map +1 -1
  42. package/dist/mlx-ml/mlx-cache-controller.d.ts +66 -0
  43. package/dist/mlx-ml/mlx-cache-controller.d.ts.map +1 -0
  44. package/dist/mlx-ml/mlx-cache-controller.js +600 -0
  45. package/dist/mlx-ml/mlx-cache-controller.js.map +1 -0
  46. package/dist/mlx-ml/mlx-driver.d.ts +12 -7
  47. package/dist/mlx-ml/mlx-driver.d.ts.map +1 -1
  48. package/dist/mlx-ml/mlx-driver.js +192 -124
  49. package/dist/mlx-ml/mlx-driver.js.map +1 -1
  50. package/dist/mlx-ml/mlx-message-utils.d.ts +9 -0
  51. package/dist/mlx-ml/mlx-message-utils.d.ts.map +1 -0
  52. package/dist/mlx-ml/mlx-message-utils.js +71 -0
  53. package/dist/mlx-ml/mlx-message-utils.js.map +1 -0
  54. package/dist/mlx-ml/process/index.d.ts +7 -3
  55. package/dist/mlx-ml/process/index.d.ts.map +1 -1
  56. package/dist/mlx-ml/process/index.js +22 -7
  57. package/dist/mlx-ml/process/index.js.map +1 -1
  58. package/dist/mlx-ml/process/model-handlers.d.ts +4 -59
  59. package/dist/mlx-ml/process/model-handlers.d.ts.map +1 -1
  60. package/dist/mlx-ml/process/model-handlers.js +15 -14
  61. package/dist/mlx-ml/process/model-handlers.js.map +1 -1
  62. package/dist/mlx-ml/process/model-specific.d.ts +7 -0
  63. package/dist/mlx-ml/process/model-specific.d.ts.map +1 -1
  64. package/dist/mlx-ml/process/model-specific.js +3 -0
  65. package/dist/mlx-ml/process/model-specific.js.map +1 -1
  66. package/dist/mlx-ml/process/process-communication.d.ts +3 -0
  67. package/dist/mlx-ml/process/process-communication.d.ts.map +1 -1
  68. package/dist/mlx-ml/process/process-communication.js +13 -0
  69. package/dist/mlx-ml/process/process-communication.js.map +1 -1
  70. package/dist/mlx-ml/process/queue.d.ts +5 -2
  71. package/dist/mlx-ml/process/queue.d.ts.map +1 -1
  72. package/dist/mlx-ml/process/queue.js +101 -14
  73. package/dist/mlx-ml/process/queue.js.map +1 -1
  74. package/dist/mlx-ml/process/response-processor.d.ts +10 -0
  75. package/dist/mlx-ml/process/response-processor.d.ts.map +1 -1
  76. package/dist/mlx-ml/process/response-processor.js +23 -1
  77. package/dist/mlx-ml/process/response-processor.js.map +1 -1
  78. package/dist/mlx-ml/process/types.d.ts +50 -4
  79. package/dist/mlx-ml/process/types.d.ts.map +1 -1
  80. package/dist/mlx-ml/tool-call-parser.d.ts.map +1 -1
  81. package/dist/mlx-ml/tool-call-parser.js +44 -25
  82. package/dist/mlx-ml/tool-call-parser.js.map +1 -1
  83. package/dist/types.d.ts +2 -0
  84. package/dist/types.d.ts.map +1 -1
  85. package/package.json +7 -4
  86. package/src/mlx-ml/python/__main__.py +41 -449
  87. package/src/mlx-ml/python/backends/__init__.py +3 -0
  88. package/src/mlx-ml/python/backends/base.py +84 -0
  89. package/src/mlx-ml/python/backends/mlx_lm.py +202 -0
  90. package/src/mlx-ml/python/backends/mlx_vlm.py +99 -0
  91. package/src/mlx-ml/python/handlers/__init__.py +6 -0
  92. package/src/mlx-ml/python/handlers/cache.py +81 -0
  93. package/src/mlx-ml/python/handlers/capabilities.py +6 -0
  94. package/src/mlx-ml/python/handlers/chat.py +221 -0
  95. package/src/mlx-ml/python/handlers/completion.py +36 -0
  96. package/src/mlx-ml/python/handlers/format_test.py +70 -0
  97. package/src/mlx-ml/python/handlers/tokenize.py +63 -0
  98. package/src/mlx-ml/python/pyproject.toml +13 -3
  99. package/src/mlx-ml/python/server.py +126 -0
  100. package/src/mlx-ml/python/tests/__init__.py +0 -0
  101. package/src/mlx-ml/python/utils/__init__.py +0 -0
  102. package/src/mlx-ml/python/utils/prompt_builder.py +54 -0
  103. package/src/mlx-ml/python/{token_utils.py → utils/token_utils.py} +1 -2
  104. package/src/mlx-ml/python/uv.lock +266 -41
  105. /package/src/mlx-ml/python/{example_basic.py → examples/example_basic.py} +0 -0
  106. /package/src/mlx-ml/python/{example_tool_call.py → examples/example_tool_call.py} +0 -0
  107. /package/src/mlx-ml/python/{chat_template_constraints.py → utils/chat_template_constraints.py} +0 -0
  108. /package/src/mlx-ml/python/{vlm_utils.py → utils/vlm_utils.py} +0 -0
package/dist/types.d.ts CHANGED
@@ -133,6 +133,8 @@ export interface QueryOptions {
133
133
  toolChoice?: ToolChoice;
134
134
  /** Reasoning effort level for thinking models (e.g., o-series, llm-jp-4-thinking) */
135
135
  reasoningEffort?: 'low' | 'medium' | 'high';
136
+ /** Enable prompt caching (driver-specific optimization) */
137
+ cache?: boolean;
136
138
  }
137
139
  /**
138
140
  * Stream result with both stream and final result
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGtD,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEjG;;GAEG;AACH,MAAM,MAAM,IAAI,GAAG,QAAQ,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,CAAC;AAE5D;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,QAAQ,EAAE,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,IAAI,EAAE,cAAc,CAAC;IACrB,8BAA8B;IAC9B,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,mBAAmB,GAAG,wBAAwB,GAAG,iBAAiB,CAAC;AAE7F;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,IAAI,wBAAwB,CAEtF;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yDAAyD;IACzD,IAAI,EAAE,MAAM,CAAC;IACb,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACrC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,UAAU,GAClB,MAAM,GACN,MAAM,GACN,UAAU,GACV;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAGrB;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAE3B,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IAEF,uCAAuC;IACvC,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IAEvB,YAAY,CAAC,EAAE,YAAY,CAAC;IAE5B,kDAAkD;IAClD,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IAExB,8DAA8D;IAC9D,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,YAAY,CAAC;AAEtE;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,MAAM,CAAC;AAErE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8EAA8E;IAC9E,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,YAAY,GAAG,kBAAkB,CAAC;IACzD,iCAAiC;IACjC,KAAK,CAAC,EAAE,cAAc,EAAE,CAAC;IACzB,0BAA0B;IAC1B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,qFAAqF;IACrF,eAAe,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAE9B;;OAEG;IACH,MAAM,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB;;OAEG;IACH,KAAK,CAAC,MAAM,EAAE,OAAO,sBAAsB,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE3G;;OAEG;IACH,WAAW,CAAC,MAAM,EAAE,OAAO,sBAAsB,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAElH;;OAEG;IACH,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,YAAY,CAAC;CAC/B"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAGtD,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEjG;;GAEG;AACH,MAAM,MAAM,IAAI,GAAG,QAAQ,GAAG,WAAW,GAAG,MAAM,GAAG,MAAM,CAAC;AAE5D;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IACtC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,IAAI,EAAE,WAAW,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,QAAQ,EAAE,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,UAAU,EAAE,MAAM,CAAC;IACnB,oBAAoB;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,IAAI,EAAE,cAAc,CAAC;IACrB,8BAA8B;IAC9B,KAAK,EAAE,OAAO,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,mBAAmB,GAAG,wBAAwB,GAAG,iBAAiB,CAAC;AAE7F;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,IAAI,wBAAwB,CAEtF;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,yDAAyD;IACzD,IAAI,EAAE,MAAM,CAAC;IACb,oEAAoE;IACpE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wCAAwC;IACxC,UAAU,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACrC,0DAA0D;IAC1D,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,UAAU,GAClB,MAAM,GACN,MAAM,GACN,UAAU,GACV;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC;AAGrB;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B;;OAEG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB;;;;OAIG;IACH,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAE3B,KAAK,CAAC,EAAE;QACN,YAAY,EAAE,MAAM,CAAC;QACrB,gBAAgB,EAAE,MAAM,CAAC;QACzB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IAEF,uCAAuC;IACvC,SAAS,CAAC,EAAE,QAAQ,EAAE,CAAC;IAEvB,YAAY,CAAC,EAAE,YAAY,CAAC;IAE5B,kDAAkD;IAClD,UAAU,CAAC,EAAE,QAAQ,EAAE,CAAC;IAExB,8DAA8D;IAC9D,MAAM,CAAC,EAAE,QAAQ,EAAE,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,OAAO,GAAG,YAAY,CAAC;AAEtE;;;;;;GAMG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,UAAU,GAAG,UAAU,GAAG,MAAM,CAAC;AAErE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8EAA8E;IAC9E,IAAI,CAAC,EAAE,SAAS,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,GAAG,YAAY,GAAG,kBAAkB,CAAC;IACzD,iCAAiC;IACjC,KAAK,CAAC,EAAE,cAAc,EAAE,CAAC;IACzB,0BAA0B;IAC1B,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,qFAAqF;IACrF,eAAe,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC5C,2DAA2D;IAC3D,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAE9B;;OAEG;IACH,MAAM,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB;;OAEG;IACH,KAAK,CAAC,MAAM,EAAE,OAAO,sBAAsB,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;IAE3G;;OAEG;IACH,WAAW,CAAC,MAAM,EAAE,OAAO,sBAAsB,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IAElH;;OAEG;IACH,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,QAAQ,EAAE,QAAQ,GAAG,WAAW,GAAG,QAAQ,GAAG,MAAM,CAAC;IACrD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,YAAY,CAAC;CAC/B"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@modular-prompt/driver",
3
- "version": "0.12.0",
3
+ "version": "0.13.1",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -20,13 +20,13 @@
20
20
  "@anthropic-ai/sdk": "0.61.0",
21
21
  "@anthropic-ai/vertex-sdk": "0.14.4",
22
22
  "@google-cloud/vertexai": "1.10.0",
23
- "@google/genai": "1.34.0",
23
+ "@google/genai": "2.0.1",
24
24
  "@types/js-yaml": "4.0.9",
25
25
  "google-auth-library": "9.15.1",
26
26
  "js-yaml": "4.1.1",
27
27
  "openai": "5.23.2",
28
- "@modular-prompt/core": "0.2.2",
29
- "@modular-prompt/utils": "0.3.4"
28
+ "@modular-prompt/core": "0.3.0",
29
+ "@modular-prompt/utils": "0.3.5"
30
30
  },
31
31
  "devDependencies": {
32
32
  "@eslint/js": "9.39.2",
@@ -38,6 +38,9 @@
38
38
  "typescript": "5.9.3",
39
39
  "vitest": "3.2.4"
40
40
  },
41
+ "engines": {
42
+ "node": ">=20.0.0"
43
+ },
41
44
  "publishConfig": {
42
45
  "access": "public",
43
46
  "registry": "https://registry.npmjs.org/"
@@ -1,466 +1,58 @@
1
1
  import sys
2
- import json
3
- from vlm_utils import detect_model_kind, load_and_resize_images
4
- from token_utils import get_capabilities, is_eod_token
2
+
3
+ from backends import MlxLmBackend, MlxVlmBackend
4
+ from utils.token_utils import get_capabilities
5
+ from utils.vlm_utils import detect_model_kind
6
+ from server import Server
5
7
 
6
8
  model_name = sys.argv[1] if len(sys.argv) > 1 else "mlx-community/gemma-3-270m-it-qat-4bit"
7
9
  text_only = "--text-only" in sys.argv
8
10
 
9
- # モデル種別の判定とロード
10
- model_kind = "lm" if text_only else detect_model_kind(model_name)
11
-
12
- if model_kind == "vlm":
13
- from mlx_vlm import load as vlm_load, stream_generate as vlm_stream_generate
14
- try:
15
- model, processor = vlm_load(model_name)
16
- tokenizer = processor # capabilities取得用(VLMのprocessorもtokenizer互換)
17
- except (ValueError, Exception) as e:
18
- # mlx_vlm.models にモジュールが存在しても、実際のモデルに vision コンポーネントが
19
- # ない場合(例: Qwen3.5 テキストモデルが qwen2_vl として認識される)にフォールバック
20
- sys.stderr.write(f"VLM load failed, falling back to LM: {e}\n")
21
- model_kind = "lm"
22
- from mlx_lm import load, stream_generate
23
- from mlx_lm.sample_utils import make_sampler
24
- model, tokenizer = load(model_name)
25
- else:
26
- from mlx_lm import load, stream_generate
27
- from mlx_lm.sample_utils import make_sampler
28
- model, tokenizer = load(model_name)
29
-
30
- # Capabilities情報の取得
31
- capabilities = get_capabilities(tokenizer)
32
- capabilities["model_kind"] = model_kind
33
-
34
- def read():
35
- lines = []
36
- data = None
37
- eof = False
38
- while not eof:
39
- line = sys.stdin.readline()
40
- # sys.stderr.write('line:' + line + '\n')
41
- if not line:
42
- eof = True
43
- else:
44
- lines.append(line)
45
- try:
46
- data = json.loads(''.join(lines))
47
- except json.JSONDecodeError as e:
48
- data = None
49
- continue
50
- break
51
- return data
52
-
53
-
54
- def supports_chat_template():
55
- """
56
- チャットテンプレートがサポートされているかを判定
57
-
58
- apply_chat_templateメソッドの存在と、tokenizer.chat_templateの両方を確認する。
59
- tokenizer.chat_templateが設定されていない場合、apply_chat_templateを呼んでも
60
- エラーになるため、両方の条件をチェックする必要がある。
61
-
62
- Returns:
63
- bool: チャットテンプレートがサポートされている場合True
64
- """
65
- return (hasattr(tokenizer, 'apply_chat_template') and
66
- hasattr(tokenizer, 'chat_template') and
67
- tokenizer.chat_template is not None)
68
-
69
-
70
- def handle_capabilities():
71
- """capabilities API の処理"""
72
- print(json.dumps(capabilities), end='\0', flush=True)
73
-
74
-
75
- def handle_format_test(messages, options=None, tools=None):
76
- """フォーマットテスト API の処理(実際に生成せずフォーマットのみ)"""
77
- if options is None:
78
- options = {}
79
-
80
- result = {
81
- "formatted_prompt": None,
82
- "template_applied": False,
83
- "model_specific_processing": None,
84
- "error": None
85
- }
86
-
87
- try:
88
- # チャットテンプレートが利用可能かチェック
89
- if supports_chat_template():
90
- # messagesはTypeScript側で既にモデル固有処理済み
91
- result["model_specific_processing"] = messages
92
-
93
- # プロンプト生成(フォーマットのみ)
94
- primer = options.get('primer')
95
- add_generation_prompt = True
96
- tokenize = False # 常にテキストで返す
97
-
98
- if primer is not None:
99
- messages.append({'role': 'assistant', 'content': primer})
100
- add_generation_prompt = False
11
+ drafter_model = None
12
+ if "--drafter" in sys.argv:
13
+ idx = sys.argv.index("--drafter")
14
+ if idx + 1 < len(sys.argv):
15
+ drafter_model = sys.argv[idx + 1]
101
16
 
102
- # tools対応を試みる(テンプレートが対応していなければtools無しで実行)
103
- try:
104
- formatted_prompt = tokenizer.apply_chat_template(
105
- messages,
106
- tools=tools,
107
- add_generation_prompt=add_generation_prompt,
108
- tokenize=tokenize,
109
- )
110
- except TypeError:
111
- formatted_prompt = tokenizer.apply_chat_template(
112
- messages,
113
- add_generation_prompt=add_generation_prompt,
114
- tokenize=tokenize,
115
- )
116
-
117
- if primer is not None:
118
- formatted_prompt = primer.join(formatted_prompt.split(primer)[0:-1]) + primer
119
-
120
- result["formatted_prompt"] = formatted_prompt
121
- result["template_applied"] = True
122
- else:
123
- # チャットテンプレートがない場合はcompletionフォーマット
124
- formatted_prompt = generate_merged_prompt(messages)
125
- primer = options.get('primer')
126
- if primer is not None:
127
- formatted_prompt += primer
128
-
129
- result["formatted_prompt"] = formatted_prompt
130
- result["template_applied"] = False
131
-
132
- except Exception as e:
133
- result["error"] = str(e)
134
-
135
- print(json.dumps(result), end='\0', flush=True)
136
-
137
- def handle_chat(messages, primer=None, options=None, tools=None, reasoning_effort=None):
138
- """chat API の処理"""
139
- if options is None:
140
- options = {}
141
-
142
- trust_remote_code = options.pop('trust_remote_code', None)
143
-
144
- # チャットテンプレートが利用可能かチェック
145
- if not supports_chat_template():
146
- # チャットテンプレートがない場合はcompletionフォーマットに変換
147
- prompt = generate_merged_prompt(messages)
148
- if primer is not None:
149
- print(primer, end='', flush=True)
150
- generate_text(prompt, options)
151
- return
152
-
153
- # プロンプト生成
154
- add_generation_prompt = True
155
- tokenize = False
156
-
157
- if primer is not None:
158
- messages.append({'role': 'assistant', 'content': primer})
159
- add_generation_prompt = False
160
- tokenize = False
161
-
162
- # apply_chat_templateの追加引数(reasoning_effort等)
163
- extra_kwargs = {}
164
- if tools is not None:
165
- extra_kwargs['tools'] = tools
166
- if reasoning_effort is not None:
167
- extra_kwargs['reasoning_effort'] = reasoning_effort
168
- if trust_remote_code is not None:
169
- extra_kwargs['trust_remote_code'] = trust_remote_code
170
-
171
- # テンプレート適用(対応していないkwargsはTypeErrorになるので段階的にフォールバック)
172
- try:
173
- prompt = tokenizer.apply_chat_template(
174
- messages,
175
- add_generation_prompt=add_generation_prompt,
176
- tokenize=tokenize,
177
- **extra_kwargs,
178
- )
179
- except TypeError:
180
- # reasoning_effort非対応の場合、toolsのみで再試行
17
+ draft_block_size = None
18
+ if "--draft-block-size" in sys.argv:
19
+ idx = sys.argv.index("--draft-block-size")
20
+ if idx + 1 < len(sys.argv):
181
21
  try:
182
- fallback_kwargs = {}
183
- if tools is not None:
184
- fallback_kwargs['tools'] = tools
185
- prompt = tokenizer.apply_chat_template(
186
- messages,
187
- add_generation_prompt=add_generation_prompt,
188
- tokenize=tokenize,
189
- **fallback_kwargs,
190
- )
191
- except TypeError:
192
- prompt = tokenizer.apply_chat_template(
193
- messages,
194
- add_generation_prompt=add_generation_prompt,
195
- tokenize=tokenize,
196
- )
197
-
198
- if primer is not None:
199
- prompt = primer.join(prompt.split(primer)[0:-1]) + primer
200
- print(primer, end='', flush=True)
201
-
202
- generate_text(prompt, options)
203
-
204
-
205
- def generate_merged_prompt(messages):
206
- """apply_chat_templateがない場合のプロンプト生成"""
207
- # messagesはTypeScript側で既にmergeSystemMessages処理済み
208
- # TypeScript側のformatterと同じフォーマットを維持
209
-
210
- prompt_parts = []
211
- special_tokens = capabilities.get('special_tokens', {})
22
+ draft_block_size = int(sys.argv[idx + 1])
23
+ except ValueError:
24
+ sys.stderr.write(f"Invalid --draft-block-size value: {sys.argv[idx + 1]}\n")
25
+ sys.exit(1)
212
26
 
213
- for msg in messages:
214
- role = msg['role'] # 小文字のまま
215
- role_upper = role.upper()
216
27
 
217
- # 1. 専用のspecial_tokenを探す
218
- role_token = special_tokens.get(role)
28
+ def create_backend(model_name: str, text_only: bool = False):
29
+ model_kind = "lm" if text_only else detect_model_kind(model_name)
219
30
 
220
- if role_token and isinstance(role_token, dict) and 'start' in role_token:
221
- # 専用トークンがある場合
222
- start_token = role_token['start']['text']
223
- end_token = role_token['end']['text']
224
- prompt_parts.extend([
225
- start_token,
226
- msg['content'].strip(),
227
- end_token,
228
- '' # 空行で区切る
229
- ])
230
- else:
231
- # 2. 専用トークンがない場合、汎用blockトークンを探す
232
- # blockやcontextなどの汎用的なペアトークンを探す
233
- block_token = None
234
- for candidate in ['block', 'context', 'quote', 'section']:
235
- token = special_tokens.get(candidate)
236
- if token and isinstance(token, dict) and 'start' in token:
237
- block_token = token
238
- break
239
-
240
- if block_token:
241
- # 汎用blockトークンがある場合: {block_begin}{role}:\n...{block_end}
242
- start_token = block_token['start']['text']
243
- end_token = block_token['end']['text']
244
- prompt_parts.extend([
245
- f'{start_token}{role_upper}:\n{msg["content"].strip()}',
246
- end_token,
247
- '' # 空行で区切る
248
- ])
249
- else:
250
- # 3. どちらもない場合は、HTMLコメント形式(フォールバック)
251
- prompt_parts.extend([
252
- f'<!-- begin of {role_upper} -->',
253
- msg['content'].strip(),
254
- f'<!-- end of {role_upper} -->',
255
- '' # 空行で区切る
256
- ])
257
-
258
- # 最後の空行を削除して、ダブル改行で結合
259
- return '\n'.join(prompt_parts[:-1])
260
-
261
-
262
- def handle_completion(prompt, options=None, images=None, max_image_size=768):
263
- """completion API の処理
264
-
265
- VLMモデルの場合、TypeScript側でプロンプトにimageトークンが挿入済み。
266
- images が渡された場合は VLM 生成を使用する。
267
- """
268
- if options is None:
269
- options = {}
270
-
271
- # promptはTypeScript側で既にモデル固有処理済み
272
-
273
- if images:
274
- pil_images = load_and_resize_images(images, max_image_size)
275
-
276
- import re
277
- display_prompt = re.sub(r'(<\|image_pad\|>)+', '<|image_pad|>...', prompt)
278
- sys.stderr.write(f"--- vlm completion (images: {len(pil_images)}, max_size: {max_image_size})\n{display_prompt}\n")
279
-
280
- generate_text_vlm(prompt, pil_images, options)
281
- else:
282
- generate_text(prompt, options)
283
-
284
-
285
- def handle_chat_vlm(messages, images, options=None, max_image_size=768, tools=None, primer=None):
286
- """VLMモデル用のチャット処理
287
-
288
- messages: TypeScript側で画像プレースホルダー({type: "image"})が挿入済み
289
- images: 画像ファイルパスの配列(プレースホルダーと位置が対応)
290
- tools: ツール定義(テンプレートが対応している場合のみ使用)
291
- primer: アシスタント応答のプリフィックス
292
- """
293
- if options is None:
294
- options = {}
295
-
296
- # primer処理
297
- add_generation_prompt = True
298
- if primer is not None:
299
- messages.append({'role': 'assistant', 'content': primer})
300
- add_generation_prompt = False
301
-
302
- # processorのapply_chat_templateを直接使用
303
- # systemメッセージのマージはTypeScript側でchat_restrictionsに基づき処理済み
304
- # tools対応を試みる(テンプレートが対応していなければtools無しで実行)
305
- try:
306
- formatted_prompt = processor.apply_chat_template(
307
- messages,
308
- tools=tools,
309
- add_generation_prompt=add_generation_prompt,
310
- tokenize=False,
311
- )
312
- except TypeError:
313
- formatted_prompt = processor.apply_chat_template(
314
- messages,
315
- add_generation_prompt=add_generation_prompt,
316
- tokenize=False,
317
- )
318
-
319
- if primer is not None:
320
- formatted_prompt = primer.join(formatted_prompt.split(primer)[0:-1]) + primer
321
- print(primer, end='', flush=True)
322
-
323
- # 画像ファイルを読み込み・リサイズ
324
- pil_images = load_and_resize_images(images, max_image_size)
325
-
326
- # image_padトークンを省略して表示(大量のパディングで読みづらいため)
327
- import re
328
- display_prompt = re.sub(r'(<\|image_pad\|>)+', '<|image_pad|>...', formatted_prompt)
329
- sys.stderr.write(f"--- vlm prompt (images: {len(pil_images)}, max_size: {max_image_size})\n{display_prompt}\n")
330
-
331
- generate_text_vlm(formatted_prompt, pil_images, options)
332
-
333
-
334
- def generate_text_vlm(prompt, images, options, stop_token_ids=None):
335
- """VLMストリーミング生成"""
336
- temperature = options.pop('temperature', 1.0) if 'temperature' in options else 1.0
337
- max_tokens = options.pop('max_tokens', 1000) if 'max_tokens' in options else 1000
338
- top_p = options.pop('top_p', 0.0) if 'top_p' in options else 0.0
339
- top_k = options.pop('top_k', 0) if 'top_k' in options else 0
340
-
341
- for response in vlm_stream_generate(
342
- model, processor, prompt,
343
- image=images if images else None,
344
- max_tokens=max_tokens,
345
- temperature=temperature,
346
- top_p=top_p,
347
- top_k=top_k,
348
- ):
349
- # 追加 stop token チェック(tool call end 等)
350
- if stop_token_ids and hasattr(response, 'token') and int(response.token) in stop_token_ids:
351
- sys.stderr.write(f"--- stop token detected (vlm): {int(response.token)}\n")
352
- print('\n', end='\0', flush=True)
353
- return
354
- print(response.text.replace('\0', ''), end='', flush=True)
355
-
356
- print('\n', end='\0', flush=True)
357
-
358
-
359
- def generate_text(prompt, options):
360
- """テキスト生成の共通処理
361
-
362
- 注意: optionsはTypeScript側で事前にバリデーション済み
363
- - temperatureパラメータはsamplerオブジェクトに変換
364
- - サポートされていないパラメータはTS側でフィルタリング
365
- """
366
- # デフォルトオプションの設定
367
- default_options = {'max_tokens': 1000}
368
-
369
- # temperatureパラメータを抽出してsamplerを作成
370
- temperature = options.pop('temperature', 1.0) if 'temperature' in options else 1.0
371
- top_p = options.pop('top_p', 0.0) if 'top_p' in options else 0.0
372
- top_k = options.pop('top_k', 0) if 'top_k' in options else 0
373
-
374
- # samplerオブジェクトを作成
375
- sampler = make_sampler(temp=temperature, top_p=top_p, top_k=top_k)
376
-
377
- # 残りのオプションとマージ
378
- final_options = {**default_options, **options, 'sampler': sampler}
379
-
380
- if isinstance(prompt, list): # tokenized
381
- sys.stderr.write(f"--- prompt: len={len(prompt)}\n")
382
- else:
383
- sys.stderr.write(f"--- prompt\n{prompt}\n")
384
-
385
- eos_detected = False
386
- for response in stream_generate(model, tokenizer, prompt, **final_options):
387
- # トークンIDによるEOS判定(より確実)
388
- if is_eod_token(response, tokenizer):
389
- eos_detected = True
390
- print('\n', end='\0', flush=True)
391
- break
392
- if not eos_detected:
393
- print(response.text.replace('\0', ''), end='', flush=True)
394
-
395
- if not eos_detected:
396
- print('\n', end='\0', flush=True)
397
-
398
- def main():
399
- while True:
400
- req = read()
401
- if req is None:
402
- break
403
-
404
- method = req.get('method')
405
- if not method:
406
- sys.stderr.write("Error: 'method' field is required\n")
407
- print('\n', end='\0', flush=True)
408
- continue
409
-
31
+ if model_kind == "vlm":
32
+ backend = MlxVlmBackend()
410
33
  try:
411
- if method == 'capabilities':
412
- handle_capabilities()
413
-
414
- elif method == 'format_test':
415
- messages = req.get('messages')
416
- if not messages:
417
- sys.stderr.write("Error: 'messages' field is required for format_test method\n")
418
- print('\n', end='\0', flush=True)
419
- continue
34
+ backend.load(model_name)
35
+ return backend, "vlm"
36
+ except (ValueError, Exception) as e:
37
+ sys.stderr.write(f"VLM load failed, falling back to LM: {e}\n")
420
38
 
421
- options = req.get('options', {})
422
- tools = req.get('tools')
423
- handle_format_test(messages, options, tools)
39
+ backend = MlxLmBackend()
40
+ backend.load(model_name)
41
+ return backend, "lm"
424
42
 
425
- elif method == 'chat':
426
- messages = req.get('messages')
427
- if not messages:
428
- sys.stderr.write("Error: 'messages' field is required for chat method\n")
429
- print('\n', end='\0', flush=True)
430
- continue
431
43
 
432
- primer = req.get('primer')
433
- options = req.get('options', {})
434
- tools = req.get('tools')
435
- images = req.get('images', [])
436
- reasoning_effort = req.get('reasoning_effort')
44
+ if __name__ == "__main__":
45
+ backend, model_kind = create_backend(model_name, text_only)
437
46
 
438
- if model_kind == "vlm":
439
- max_image_size = req.get('maxImageSize', 768)
440
- handle_chat_vlm(messages, images, options, max_image_size, tools, primer)
441
- else:
442
- handle_chat(messages, primer, options, tools, reasoning_effort=reasoning_effort)
443
-
444
- elif method == 'completion':
445
- prompt = req.get('prompt')
446
- if not prompt:
447
- sys.stderr.write("Error: 'prompt' field is required for completion method\n")
448
- print('\n', end='\0', flush=True)
449
- continue
450
-
451
- options = req.get('options', {})
452
- images = req.get('images', [])
453
- max_image_size = req.get('maxImageSize', 768)
454
- handle_completion(prompt, options, images if images else None, max_image_size)
455
-
456
- else:
457
- sys.stderr.write(f"Error: Unknown method '{method}'\n")
458
- print('\n', end='\0', flush=True)
459
-
460
- except Exception as e:
461
- sys.stderr.write(f"Error processing request: {e}\n")
462
- print('\n', end='\0', flush=True)
47
+ if drafter_model:
48
+ backend.load_drafter(drafter_model)
49
+ if draft_block_size is not None and hasattr(backend, 'draft_block_size'):
50
+ backend.draft_block_size = draft_block_size
463
51
 
52
+ capabilities = get_capabilities(backend.get_tokenizer())
53
+ capabilities["model_kind"] = model_kind
54
+ if model_kind == "lm":
55
+ capabilities["methods"].append("cache_prefill")
464
56
 
465
- if __name__ == "__main__":
466
- main()
57
+ server = Server(backend, capabilities)
58
+ server.run()
@@ -0,0 +1,3 @@
1
+ from backends.base import ModelBackend
2
+ from backends.mlx_lm import MlxLmBackend
3
+ from backends.mlx_vlm import MlxVlmBackend
@@ -0,0 +1,84 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Iterator
3
+
4
+
5
+ class ModelBackend(ABC):
6
+ """Abstract base class for model backends."""
7
+
8
+ @abstractmethod
9
+ def load(self, model_name: str) -> None:
10
+ """Load the target model."""
11
+ raise NotImplementedError
12
+
13
+ @abstractmethod
14
+ def get_tokenizer(self) -> Any:
15
+ """Return the tokenizer or processor."""
16
+ raise NotImplementedError
17
+
18
+ @abstractmethod
19
+ def stream_generate(
20
+ self, prompt: str | list[int], options: dict, images: list | None = None,
21
+ prompt_cache: list | None = None,
22
+ ) -> Iterator[Any]:
23
+ """Stream generation results."""
24
+ raise NotImplementedError
25
+
26
+ @abstractmethod
27
+ def supports_vision(self) -> bool:
28
+ """Return whether image input is supported."""
29
+ raise NotImplementedError
30
+
31
+ @property
32
+ @abstractmethod
33
+ def model_kind(self) -> str:
34
+ """Return "lm" or "vlm"."""
35
+ raise NotImplementedError
36
+
37
+ def load_drafter(self, drafter_model: str) -> None:
38
+ """Load a drafter model for speculative decoding."""
39
+ raise NotImplementedError(
40
+ f"{type(self).__name__} does not support drafter models"
41
+ )
42
+
43
+ def has_drafter(self) -> bool:
44
+ """Return whether a drafter model is loaded."""
45
+ return False
46
+
47
+ def cache_prefill(
48
+ self,
49
+ cache_path: str,
50
+ prompt: str,
51
+ base_cache_path: str | None = None,
52
+ trim_to_tokens: int | None = None,
53
+ prefix_offsets: list[int] | None = None,
54
+ prefix_hashes: list[str] | None = None,
55
+ ) -> dict:
56
+ """Build a KV cache from a prompt prefix."""
57
+ raise NotImplementedError(
58
+ f"{type(self).__name__} does not support prompt caching"
59
+ )
60
+
61
+ def load_cache_from_file(self, cache_path: str) -> list | None:
62
+ """Load a prompt cache from file, or None."""
63
+ return None
64
+
65
+ def get_cache_offset(self, prompt_cache: list) -> int:
66
+ """Get the number of tokens stored in a loaded prompt cache."""
67
+ if not prompt_cache:
68
+ return 0
69
+ layer0 = prompt_cache[0]
70
+ if hasattr(layer0, 'offset'):
71
+ off = layer0.offset
72
+ return int(off.item() if hasattr(off, 'item') else off)
73
+ if hasattr(layer0, 'caches'):
74
+ for c in layer0.caches:
75
+ if hasattr(c, 'offset'):
76
+ off = c.offset
77
+ return int(off.item() if hasattr(off, 'item') else off)
78
+ try:
79
+ return int(layer0[0].shape[2])
80
+ except Exception:
81
+ pass
82
+ if hasattr(layer0, 'keys') and layer0.keys is not None:
83
+ return int(layer0.keys.shape[2])
84
+ return 0