myagent-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (486) hide show
  1. package/Dockerfile +30 -0
  2. package/README.md +333 -0
  3. package/agents/__init__.py +6 -0
  4. package/agents/__pycache__/main_agent.cpython-312.pyc +0 -0
  5. package/agents/base.py +115 -0
  6. package/agents/main_agent.py +695 -0
  7. package/agents/memory_agent.py +313 -0
  8. package/agents/tool_agent.py +248 -0
  9. package/chatbot/__init__.py +5 -0
  10. package/chatbot/base.py +124 -0
  11. package/chatbot/discord_bot.py +146 -0
  12. package/chatbot/feishu_bot.py +548 -0
  13. package/chatbot/manager.py +164 -0
  14. package/chatbot/qq_bot.py +189 -0
  15. package/chatbot/telegram_bot.py +167 -0
  16. package/chatbot/wechat_bot.py +558 -0
  17. package/communication/__init__.py +66 -0
  18. package/communication/channel.py +576 -0
  19. package/communication/crypto.py +347 -0
  20. package/communication/manager.py +397 -0
  21. package/communication/peer.py +156 -0
  22. package/config.py +464 -0
  23. package/core/__init__.py +10 -0
  24. package/core/config_broadcast.py +276 -0
  25. package/core/llm.py +878 -0
  26. package/core/logger.py +241 -0
  27. package/core/task_queue.py +362 -0
  28. package/core/utils.py +184 -0
  29. package/executor/__init__.py +4 -0
  30. package/executor/__pycache__/engine.cpython-312.pyc +0 -0
  31. package/executor/engine.py +1215 -0
  32. package/groups/__init__.py +15 -0
  33. package/groups/manager.py +724 -0
  34. package/knowledge/__init__.py +4 -0
  35. package/knowledge/rag.py +444 -0
  36. package/main.py +801 -0
  37. package/memory/__init__.py +4 -0
  38. package/memory/manager.py +840 -0
  39. package/organization/__init__.py +4 -0
  40. package/organization/manager.py +350 -0
  41. package/package.json +58 -0
  42. package/requirements.txt +59 -0
  43. package/setup.py +40 -0
  44. package/skills/ASR/LICENSE.txt +21 -0
  45. package/skills/ASR/SKILL.md +580 -0
  46. package/skills/ASR/scripts/asr.ts +27 -0
  47. package/skills/LLM/LICENSE.txt +21 -0
  48. package/skills/LLM/SKILL.md +856 -0
  49. package/skills/LLM/scripts/chat.ts +32 -0
  50. package/skills/TTS/LICENSE.txt +21 -0
  51. package/skills/TTS/SKILL.md +735 -0
  52. package/skills/TTS/tts.ts +25 -0
  53. package/skills/VLM/LICENSE.txt +21 -0
  54. package/skills/VLM/SKILL.md +588 -0
  55. package/skills/VLM/scripts/vlm.ts +57 -0
  56. package/skills/__init__.py +5 -0
  57. package/skills/agent-browser/SKILL.md +328 -0
  58. package/skills/ai-news-collectors/SKILL.md +157 -0
  59. package/skills/ai-news-collectors/_meta.json +6 -0
  60. package/skills/ai-news-collectors/references/sources.md +128 -0
  61. package/skills/aminer-open-academic/SKILL.md +312 -0
  62. package/skills/aminer-open-academic/_meta.json +6 -0
  63. package/skills/aminer-open-academic/evals/evals.json +46 -0
  64. package/skills/aminer-open-academic/references/api-catalog.md +1032 -0
  65. package/skills/aminer-open-academic/scripts/__pycache__/aminer_client.cpython-312.pyc +0 -0
  66. package/skills/aminer-open-academic/scripts/aminer_client.py +875 -0
  67. package/skills/auto-target-tracker/SKILL.md +317 -0
  68. package/skills/base.py +147 -0
  69. package/skills/blog-writer/2024-02-17-radical-transparency-sales.md +35 -0
  70. package/skills/blog-writer/2024-02-17-raycast-spotlight-superpowers.md +33 -0
  71. package/skills/blog-writer/2024-02-17-short-form-content-marketing.md +47 -0
  72. package/skills/blog-writer/2024-02-17-typing-speed-benefits.md +33 -0
  73. package/skills/blog-writer/2024-03-14-effective-ai-prompts.md +55 -0
  74. package/skills/blog-writer/2024-11-08-ai-revolutionizing-entry-level-sales.md +43 -0
  75. package/skills/blog-writer/2025-11-12-why-ai-art-is-useless.md +49 -0
  76. package/skills/blog-writer/README.md +2 -0
  77. package/skills/blog-writer/SKILL.md +158 -0
  78. package/skills/blog-writer/__pycache__/manage_examples.cpython-312.pyc +0 -0
  79. package/skills/blog-writer/_meta.json +6 -0
  80. package/skills/blog-writer/manage_examples.py +90 -0
  81. package/skills/blog-writer/style-guide.md +160 -0
  82. package/skills/browser_skill.py +146 -0
  83. package/skills/coding-agent/SKILL.md +120 -0
  84. package/skills/coding-agent/_meta.json +6 -0
  85. package/skills/coding-agent/criteria.md +48 -0
  86. package/skills/coding-agent/execution.md +42 -0
  87. package/skills/coding-agent/memory-template.md +38 -0
  88. package/skills/coding-agent/planning.md +31 -0
  89. package/skills/coding-agent/state.md +60 -0
  90. package/skills/coding-agent/verification.md +39 -0
  91. package/skills/content-strategy/SKILL.md +181 -0
  92. package/skills/content-strategy/_meta.json +6 -0
  93. package/skills/contentanalysis/ExtractWisdom/SKILL.md +229 -0
  94. package/skills/contentanalysis/ExtractWisdom/Workflows/Extract.md +60 -0
  95. package/skills/contentanalysis/SKILL.md +14 -0
  96. package/skills/docx/CHANGELOG.md +85 -0
  97. package/skills/docx/LICENSE.txt +30 -0
  98. package/skills/docx/SKILL.md +455 -0
  99. package/skills/docx/docx-js.md +681 -0
  100. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  101. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  102. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  103. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  104. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  105. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  106. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  107. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  108. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  109. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  110. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  111. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  112. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  113. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  114. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  115. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  116. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  117. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  118. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  119. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  120. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  121. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  122. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  123. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  124. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  125. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  126. package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  127. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  128. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  129. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  130. package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  131. package/skills/docx/ooxml/schemas/mce/mc.xsd +75 -0
  132. package/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  133. package/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  134. package/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  135. package/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  136. package/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  137. package/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  138. package/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  139. package/skills/docx/ooxml/scripts/__pycache__/pack.cpython-312.pyc +0 -0
  140. package/skills/docx/ooxml/scripts/__pycache__/unpack.cpython-312.pyc +0 -0
  141. package/skills/docx/ooxml/scripts/__pycache__/validate.cpython-312.pyc +0 -0
  142. package/skills/docx/ooxml/scripts/pack.py +159 -0
  143. package/skills/docx/ooxml/scripts/unpack.py +29 -0
  144. package/skills/docx/ooxml/scripts/validate.py +69 -0
  145. package/skills/docx/ooxml/scripts/validation/__init__.py +15 -0
  146. package/skills/docx/ooxml/scripts/validation/__pycache__/__init__.cpython-312.pyc +0 -0
  147. package/skills/docx/ooxml/scripts/validation/__pycache__/base.cpython-312.pyc +0 -0
  148. package/skills/docx/ooxml/scripts/validation/__pycache__/docx.cpython-312.pyc +0 -0
  149. package/skills/docx/ooxml/scripts/validation/__pycache__/pptx.cpython-312.pyc +0 -0
  150. package/skills/docx/ooxml/scripts/validation/__pycache__/redlining.cpython-312.pyc +0 -0
  151. package/skills/docx/ooxml/scripts/validation/base.py +951 -0
  152. package/skills/docx/ooxml/scripts/validation/docx.py +274 -0
  153. package/skills/docx/ooxml/scripts/validation/pptx.py +315 -0
  154. package/skills/docx/ooxml/scripts/validation/redlining.py +279 -0
  155. package/skills/docx/ooxml.md +615 -0
  156. package/skills/docx/scripts/__init__.py +1 -0
  157. package/skills/docx/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  158. package/skills/docx/scripts/__pycache__/add_toc_placeholders.cpython-312.pyc +0 -0
  159. package/skills/docx/scripts/__pycache__/document.cpython-312.pyc +0 -0
  160. package/skills/docx/scripts/__pycache__/utilities.cpython-312.pyc +0 -0
  161. package/skills/docx/scripts/add_toc_placeholders.py +220 -0
  162. package/skills/docx/scripts/document.py +1302 -0
  163. package/skills/docx/scripts/templates/comments.xml +3 -0
  164. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  165. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  166. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  167. package/skills/docx/scripts/templates/people.xml +3 -0
  168. package/skills/docx/scripts/utilities.py +374 -0
  169. package/skills/dream-interpreter/SKILL.md +88 -0
  170. package/skills/dream-interpreter/assets/example_asset.txt +24 -0
  171. package/skills/dream-interpreter/references/api_reference.md +34 -0
  172. package/skills/dream-interpreter/references/interpretation-guide.md +83 -0
  173. package/skills/dream-interpreter/references/output-schema.md +65 -0
  174. package/skills/dream-interpreter/references/questioning-strategy.md +62 -0
  175. package/skills/dream-interpreter/references/visual-mapping.md +81 -0
  176. package/skills/dream-interpreter/scripts/__pycache__/example.cpython-312.pyc +0 -0
  177. package/skills/dream-interpreter/scripts/example.py +19 -0
  178. package/skills/dream-interpreter/skill.json +7 -0
  179. package/skills/file_skill.py +246 -0
  180. package/skills/finance/Finance_API_Doc.md +445 -0
  181. package/skills/finance/SKILL.md +53 -0
  182. package/skills/fullstack-dev/SKILL.md +205 -0
  183. package/skills/get-fortune-analysis/SKILL.md +370 -0
  184. package/skills/get-fortune-analysis/lunar_python.py +91 -0
  185. package/skills/gift-evaluator/SKILL.md +83 -0
  186. package/skills/gift-evaluator/__pycache__/html_tools.cpython-312.pyc +0 -0
  187. package/skills/gift-evaluator/html_tools.py +268 -0
  188. package/skills/image-edit/LICENSE.txt +21 -0
  189. package/skills/image-edit/SKILL.md +896 -0
  190. package/skills/image-edit/scripts/image-edit.ts +36 -0
  191. package/skills/image-generation/LICENSE.txt +21 -0
  192. package/skills/image-generation/SKILL.md +583 -0
  193. package/skills/image-generation/scripts/image-generation.ts +28 -0
  194. package/skills/image-understand/LICENSE.txt +21 -0
  195. package/skills/image-understand/SKILL.md +855 -0
  196. package/skills/image-understand/scripts/image-understand.ts +41 -0
  197. package/skills/interview-designer/README.md +70 -0
  198. package/skills/interview-designer/SKILL.md +53 -0
  199. package/skills/interview-designer/_meta.json +6 -0
  200. package/skills/interview-designer/references/design_rationale.md +43 -0
  201. package/skills/interview-designer/templates/interview_guide_template.md +62 -0
  202. package/skills/market-research-reports/SKILL.md +901 -0
  203. package/skills/market-research-reports/assets/FORMATTING_GUIDE.md +428 -0
  204. package/skills/market-research-reports/assets/market_report_template.tex +1380 -0
  205. package/skills/market-research-reports/assets/market_research.sty +564 -0
  206. package/skills/market-research-reports/references/data_analysis_patterns.md +548 -0
  207. package/skills/market-research-reports/references/report_structure_guide.md +999 -0
  208. package/skills/market-research-reports/references/visual_generation_guide.md +1077 -0
  209. package/skills/market-research-reports/scripts/__pycache__/generate_market_visuals.cpython-312.pyc +0 -0
  210. package/skills/market-research-reports/scripts/generate_market_visuals.py +529 -0
  211. package/skills/marketing-mode/README.md +49 -0
  212. package/skills/marketing-mode/SKILL.md +693 -0
  213. package/skills/marketing-mode/_meta.json +6 -0
  214. package/skills/marketing-mode/mode-prompt.md +39 -0
  215. package/skills/marketing-mode/skill.json +51 -0
  216. package/skills/mindfulness-meditation/SKILL.md +65 -0
  217. package/skills/mindfulness-meditation/_meta.json +6 -0
  218. package/skills/multi-search-engine/CHANGELOG.md +15 -0
  219. package/skills/multi-search-engine/CHANNELLOG.md +48 -0
  220. package/skills/multi-search-engine/SKILL.md +78 -0
  221. package/skills/multi-search-engine/_meta.json +6 -0
  222. package/skills/multi-search-engine/config.json +14 -0
  223. package/skills/multi-search-engine/metadata.json +7 -0
  224. package/skills/multi-search-engine/references/international-search.md +651 -0
  225. package/skills/pdf/LICENSE.txt +30 -0
  226. package/skills/pdf/SKILL.md +1534 -0
  227. package/skills/pdf/forms.md +205 -0
  228. package/skills/pdf/reference.md +765 -0
  229. package/skills/pdf/scripts/__pycache__/add_zai_metadata.cpython-312.pyc +0 -0
  230. package/skills/pdf/scripts/__pycache__/check_bounding_boxes.cpython-312.pyc +0 -0
  231. package/skills/pdf/scripts/__pycache__/check_bounding_boxes_test.cpython-312.pyc +0 -0
  232. package/skills/pdf/scripts/__pycache__/check_fillable_fields.cpython-312.pyc +0 -0
  233. package/skills/pdf/scripts/__pycache__/convert_pdf_to_images.cpython-312.pyc +0 -0
  234. package/skills/pdf/scripts/__pycache__/create_validation_image.cpython-312.pyc +0 -0
  235. package/skills/pdf/scripts/__pycache__/extract_form_field_info.cpython-312.pyc +0 -0
  236. package/skills/pdf/scripts/__pycache__/fill_fillable_fields.cpython-312.pyc +0 -0
  237. package/skills/pdf/scripts/__pycache__/fill_pdf_form_with_annotations.cpython-312.pyc +0 -0
  238. package/skills/pdf/scripts/__pycache__/sanitize_code.cpython-312.pyc +0 -0
  239. package/skills/pdf/scripts/add_zai_metadata.py +172 -0
  240. package/skills/pdf/scripts/check_bounding_boxes.py +70 -0
  241. package/skills/pdf/scripts/check_bounding_boxes_test.py +226 -0
  242. package/skills/pdf/scripts/check_fillable_fields.py +12 -0
  243. package/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
  244. package/skills/pdf/scripts/create_validation_image.py +41 -0
  245. package/skills/pdf/scripts/extract_form_field_info.py +152 -0
  246. package/skills/pdf/scripts/fill_fillable_fields.py +114 -0
  247. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +108 -0
  248. package/skills/pdf/scripts/sanitize_code.py +110 -0
  249. package/skills/podcast-generate/LICENSE.txt +21 -0
  250. package/skills/podcast-generate/SKILL.md +198 -0
  251. package/skills/podcast-generate/generate.ts +661 -0
  252. package/skills/podcast-generate/package.json +30 -0
  253. package/skills/podcast-generate/readme.md +177 -0
  254. package/skills/podcast-generate/test_data/segments.jsonl +3 -0
  255. package/skills/podcast-generate/tsconfig.json +26 -0
  256. package/skills/pptx/LICENSE.txt +30 -0
  257. package/skills/pptx/SKILL.md +507 -0
  258. package/skills/pptx/html2pptx.md +625 -0
  259. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  260. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  261. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  262. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  263. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  264. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  265. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  266. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  267. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  268. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  269. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  270. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  271. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  272. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  273. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  274. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  275. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  276. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  277. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  278. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  279. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  280. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  281. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  282. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  283. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  284. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  285. package/skills/pptx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  286. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  287. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  288. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  289. package/skills/pptx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  290. package/skills/pptx/ooxml/schemas/mce/mc.xsd +75 -0
  291. package/skills/pptx/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  292. package/skills/pptx/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  293. package/skills/pptx/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  294. package/skills/pptx/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  295. package/skills/pptx/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  296. package/skills/pptx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  297. package/skills/pptx/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  298. package/skills/pptx/ooxml/scripts/__pycache__/pack.cpython-312.pyc +0 -0
  299. package/skills/pptx/ooxml/scripts/__pycache__/unpack.cpython-312.pyc +0 -0
  300. package/skills/pptx/ooxml/scripts/__pycache__/validate.cpython-312.pyc +0 -0
  301. package/skills/pptx/ooxml/scripts/pack.py +159 -0
  302. package/skills/pptx/ooxml/scripts/unpack.py +29 -0
  303. package/skills/pptx/ooxml/scripts/validate.py +69 -0
  304. package/skills/pptx/ooxml/scripts/validation/__init__.py +15 -0
  305. package/skills/pptx/ooxml/scripts/validation/__pycache__/__init__.cpython-312.pyc +0 -0
  306. package/skills/pptx/ooxml/scripts/validation/__pycache__/base.cpython-312.pyc +0 -0
  307. package/skills/pptx/ooxml/scripts/validation/__pycache__/docx.cpython-312.pyc +0 -0
  308. package/skills/pptx/ooxml/scripts/validation/__pycache__/pptx.cpython-312.pyc +0 -0
  309. package/skills/pptx/ooxml/scripts/validation/__pycache__/redlining.cpython-312.pyc +0 -0
  310. package/skills/pptx/ooxml/scripts/validation/base.py +951 -0
  311. package/skills/pptx/ooxml/scripts/validation/docx.py +274 -0
  312. package/skills/pptx/ooxml/scripts/validation/pptx.py +315 -0
  313. package/skills/pptx/ooxml/scripts/validation/redlining.py +279 -0
  314. package/skills/pptx/ooxml.md +427 -0
  315. package/skills/pptx/scripts/__pycache__/inventory.cpython-312.pyc +0 -0
  316. package/skills/pptx/scripts/__pycache__/inventory.cpython-313.pyc +0 -0
  317. package/skills/pptx/scripts/__pycache__/rearrange.cpython-312.pyc +0 -0
  318. package/skills/pptx/scripts/__pycache__/replace.cpython-312.pyc +0 -0
  319. package/skills/pptx/scripts/__pycache__/thumbnail.cpython-312.pyc +0 -0
  320. package/skills/pptx/scripts/html2pptx.js +1044 -0
  321. package/skills/pptx/scripts/inventory.py +1020 -0
  322. package/skills/pptx/scripts/rearrange.py +231 -0
  323. package/skills/pptx/scripts/replace.py +385 -0
  324. package/skills/pptx/scripts/thumbnail.py +450 -0
  325. package/skills/qingyan-research/SKILL.md +294 -0
  326. package/skills/qingyan-research/__pycache__/generate_html.cpython-312.pyc +0 -0
  327. package/skills/qingyan-research/generate_html.py +33 -0
  328. package/skills/registry.py +344 -0
  329. package/skills/search_skill.py +228 -0
  330. package/skills/seo-content-writer/SKILL.md +661 -0
  331. package/skills/seo-content-writer/_meta.json +6 -0
  332. package/skills/seo-content-writer/references/content-structure-templates.md +875 -0
  333. package/skills/seo-content-writer/references/title-formulas.md +339 -0
  334. package/skills/skill-creator/LICENSE.txt +202 -0
  335. package/skills/skill-creator/SKILL.md +485 -0
  336. package/skills/skill-creator/agents/analyzer.md +274 -0
  337. package/skills/skill-creator/agents/comparator.md +202 -0
  338. package/skills/skill-creator/agents/grader.md +223 -0
  339. package/skills/skill-creator/assets/eval_review.html +146 -0
  340. package/skills/skill-creator/eval-viewer/__pycache__/generate_review.cpython-312.pyc +0 -0
  341. package/skills/skill-creator/eval-viewer/generate_review.py +471 -0
  342. package/skills/skill-creator/eval-viewer/viewer.html +1325 -0
  343. package/skills/skill-creator/references/schemas.md +430 -0
  344. package/skills/skill-creator/scripts/__init__.py +0 -0
  345. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  346. package/skills/skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-312.pyc +0 -0
  347. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-312.pyc +0 -0
  348. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-312.pyc +0 -0
  349. package/skills/skill-creator/scripts/__pycache__/package_skill.cpython-312.pyc +0 -0
  350. package/skills/skill-creator/scripts/__pycache__/quick_validate.cpython-312.pyc +0 -0
  351. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-312.pyc +0 -0
  352. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-312.pyc +0 -0
  353. package/skills/skill-creator/scripts/__pycache__/utils.cpython-312.pyc +0 -0
  354. package/skills/skill-creator/scripts/aggregate_benchmark.py +401 -0
  355. package/skills/skill-creator/scripts/generate_report.py +326 -0
  356. package/skills/skill-creator/scripts/improve_description.py +236 -0
  357. package/skills/skill-creator/scripts/package_skill.py +136 -0
  358. package/skills/skill-creator/scripts/quick_validate.py +103 -0
  359. package/skills/skill-creator/scripts/run_eval.py +310 -0
  360. package/skills/skill-creator/scripts/run_loop.py +328 -0
  361. package/skills/skill-creator/scripts/utils.py +47 -0
  362. package/skills/skill-finder-cn/SKILL.md +66 -0
  363. package/skills/skill-finder-cn/_meta.json +6 -0
  364. package/skills/skill-finder-cn/package.json +5 -0
  365. package/skills/skill-finder-cn/scripts/search.sh +15 -0
  366. package/skills/skill-vetter/SKILL.md +137 -0
  367. package/skills/stock-analysis-skill/SKILL.md +156 -0
  368. package/skills/stock-analysis-skill/package.json +21 -0
  369. package/skills/stock-analysis-skill/src/analyzer.ts +264 -0
  370. package/skills/stock-analysis-skill/src/dataFetcher.ts +130 -0
  371. package/skills/stock-analysis-skill/src/dividend.ts +226 -0
  372. package/skills/stock-analysis-skill/src/index.ts +327 -0
  373. package/skills/stock-analysis-skill/src/rumorScanner.ts +200 -0
  374. package/skills/stock-analysis-skill/src/types.ts +167 -0
  375. package/skills/stock-analysis-skill/src/watchlist.ts +292 -0
  376. package/skills/stock-analysis-skill/tsconfig.json +15 -0
  377. package/skills/storyboard-manager/SKILL.md +532 -0
  378. package/skills/storyboard-manager/index.js +9 -0
  379. package/skills/storyboard-manager/package.json +11 -0
  380. package/skills/storyboard-manager/references/character_development.md +232 -0
  381. package/skills/storyboard-manager/references/story_structures.md +148 -0
  382. package/skills/storyboard-manager/scripts/__pycache__/consistency_checker.cpython-312.pyc +0 -0
  383. package/skills/storyboard-manager/scripts/__pycache__/timeline_tracker.cpython-312.pyc +0 -0
  384. package/skills/storyboard-manager/scripts/consistency_checker.py +391 -0
  385. package/skills/storyboard-manager/scripts/timeline_tracker.py +352 -0
  386. package/skills/system_skill.py +249 -0
  387. package/skills/ui-ux-pro-max/SKILL.md +43 -0
  388. package/skills/ui-ux-pro-max/_meta.json +6 -0
  389. package/skills/ui-ux-pro-max/assets/data/charts.csv +26 -0
  390. package/skills/ui-ux-pro-max/assets/data/colors.csv +97 -0
  391. package/skills/ui-ux-pro-max/assets/data/icons.csv +101 -0
  392. package/skills/ui-ux-pro-max/assets/data/landing.csv +31 -0
  393. package/skills/ui-ux-pro-max/assets/data/products.csv +97 -0
  394. package/skills/ui-ux-pro-max/assets/data/react-performance.csv +45 -0
  395. package/skills/ui-ux-pro-max/assets/data/stacks/astro.csv +54 -0
  396. package/skills/ui-ux-pro-max/assets/data/stacks/flutter.csv +53 -0
  397. package/skills/ui-ux-pro-max/assets/data/stacks/html-tailwind.csv +56 -0
  398. package/skills/ui-ux-pro-max/assets/data/stacks/jetpack-compose.csv +53 -0
  399. package/skills/ui-ux-pro-max/assets/data/stacks/nextjs.csv +53 -0
  400. package/skills/ui-ux-pro-max/assets/data/stacks/nuxt-ui.csv +51 -0
  401. package/skills/ui-ux-pro-max/assets/data/stacks/nuxtjs.csv +59 -0
  402. package/skills/ui-ux-pro-max/assets/data/stacks/react-native.csv +52 -0
  403. package/skills/ui-ux-pro-max/assets/data/stacks/react.csv +54 -0
  404. package/skills/ui-ux-pro-max/assets/data/stacks/shadcn.csv +61 -0
  405. package/skills/ui-ux-pro-max/assets/data/stacks/svelte.csv +54 -0
  406. package/skills/ui-ux-pro-max/assets/data/stacks/swiftui.csv +51 -0
  407. package/skills/ui-ux-pro-max/assets/data/stacks/vue.csv +50 -0
  408. package/skills/ui-ux-pro-max/assets/data/styles.csv +68 -0
  409. package/skills/ui-ux-pro-max/assets/data/typography.csv +58 -0
  410. package/skills/ui-ux-pro-max/assets/data/ui-reasoning.csv +101 -0
  411. package/skills/ui-ux-pro-max/assets/data/ux-guidelines.csv +100 -0
  412. package/skills/ui-ux-pro-max/assets/data/web-interface.csv +31 -0
  413. package/skills/ui-ux-pro-max/data/charts.csv +26 -0
  414. package/skills/ui-ux-pro-max/data/colors.csv +97 -0
  415. package/skills/ui-ux-pro-max/data/icons.csv +101 -0
  416. package/skills/ui-ux-pro-max/data/landing.csv +31 -0
  417. package/skills/ui-ux-pro-max/data/products.csv +97 -0
  418. package/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
  419. package/skills/ui-ux-pro-max/data/stacks/astro.csv +54 -0
  420. package/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  421. package/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  422. package/skills/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  423. package/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  424. package/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  425. package/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  426. package/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  427. package/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
  428. package/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  429. package/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  430. package/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  431. package/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  432. package/skills/ui-ux-pro-max/data/styles.csv +68 -0
  433. package/skills/ui-ux-pro-max/data/typography.csv +58 -0
  434. package/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  435. package/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  436. package/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
  437. package/skills/ui-ux-pro-max/references/upstream-README.md +488 -0
  438. package/skills/ui-ux-pro-max/references/upstream-skill-content.md +288 -0
  439. package/skills/ui-ux-pro-max/scripts/__init__.py +0 -0
  440. package/skills/ui-ux-pro-max/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  441. package/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-312.pyc +0 -0
  442. package/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-312.pyc +0 -0
  443. package/skills/ui-ux-pro-max/scripts/__pycache__/search.cpython-312.pyc +0 -0
  444. package/skills/ui-ux-pro-max/scripts/core.py +253 -0
  445. package/skills/ui-ux-pro-max/scripts/design_system.py +1071 -0
  446. package/skills/ui-ux-pro-max/scripts/search.py +111 -0
  447. package/skills/video-generation/LICENSE.txt +21 -0
  448. package/skills/video-generation/SKILL.md +1082 -0
  449. package/skills/video-generation/scripts/video.ts +168 -0
  450. package/skills/video-understand/LICENSE.txt +21 -0
  451. package/skills/video-understand/SKILL.md +916 -0
  452. package/skills/video-understand/scripts/video-understand.ts +41 -0
  453. package/skills/visual-design-foundations/SKILL.md +318 -0
  454. package/skills/visual-design-foundations/references/color-systems.md +417 -0
  455. package/skills/visual-design-foundations/references/spacing-iconography.md +425 -0
  456. package/skills/visual-design-foundations/references/typography-systems.md +432 -0
  457. package/skills/web-reader/LICENSE.txt +21 -0
  458. package/skills/web-reader/SKILL.md +1140 -0
  459. package/skills/web-reader/scripts/web-reader.ts +37 -0
  460. package/skills/web-search/LICENSE.txt +21 -0
  461. package/skills/web-search/SKILL.md +912 -0
  462. package/skills/web-search/scripts/web_search.ts +44 -0
  463. package/skills/web-shader-extractor/SKILL.md +145 -0
  464. package/skills/web-shader-extractor/references/config-extraction.md +50 -0
  465. package/skills/web-shader-extractor/references/encoded-definitions.md +53 -0
  466. package/skills/web-shader-extractor/references/extraction-workflow.md +61 -0
  467. package/skills/web-shader-extractor/references/porting-strategy.md +164 -0
  468. package/skills/web-shader-extractor/references/shader-injection.md +126 -0
  469. package/skills/web-shader-extractor/references/shaders-com.md +190 -0
  470. package/skills/web-shader-extractor/references/tech-signatures.md +54 -0
  471. package/skills/web-shader-extractor/references/tsl-extraction.md +41 -0
  472. package/skills/web-shader-extractor/references/unicorn-studio.md +353 -0
  473. package/skills/web-shader-extractor/scripts/fetch-rendered-dom.mjs +153 -0
  474. package/skills/web-shader-extractor/scripts/scan-bundle.sh +76 -0
  475. package/skills/writing-plans/SKILL.md +116 -0
  476. package/skills/writing-plans/_meta.json +6 -0
  477. package/skills/xlsx/LICENSE.txt +30 -0
  478. package/skills/xlsx/SKILL.md +496 -0
  479. package/skills/xlsx/__pycache__/recalc.cpython-312.pyc +0 -0
  480. package/skills/xlsx/recalc.py +178 -0
  481. package/start.sh +36 -0
  482. package/web/__init__.py +1 -0
  483. package/web/__pycache__/api_server.cpython-312.pyc +0 -0
  484. package/web/api_server.py +2043 -0
  485. package/web/ui/chat.html +3235 -0
  486. package/web/ui/index.html +458 -0
@@ -0,0 +1,855 @@
1
+ ---
2
+ name: image-understand
3
+ description: Implement specialized image understanding capabilities using the z-ai-web-dev-sdk. Use this skill when the user needs to analyze static images, extract visual information, perform OCR, detect objects, classify images, or understand visual content. Optimized for PNG, JPEG, GIF, WebP, and BMP formats.
4
+ license: MIT
5
+ ---
6
+
7
+ # Image Understanding Skill
8
+
9
+ This skill provides specialized image understanding functionality using the z-ai-web-dev-sdk package, enabling AI models to analyze, describe, and extract information from static images.
10
+
11
+ ## Skills Path
12
+
13
+ **Skill Location**: `{project_path}/skills/image-understand`
14
+
15
+ this skill is located at above path in your project.
16
+
17
+ **Reference Scripts**: Example test scripts are available in the `{Skill Location}/scripts/` directory for quick testing and reference. See `{Skill Location}/scripts/image-understand.ts` for a working example.
18
+
19
+ ## Overview
20
+
21
+ Image Understanding focuses specifically on static image analysis, providing capabilities for:
22
+ - Image description and scene understanding
23
+ - Object detection and recognition
24
+ - OCR (Optical Character Recognition) and text extraction
25
+ - Image classification and categorization
26
+ - Visual content analysis
27
+ - Quality assessment
28
+ - Accessibility (alt text generation)
29
+
30
+ **IMPORTANT**: z-ai-web-dev-sdk MUST be used in backend code only. Never use it in client-side code.
31
+
32
+ ## Prerequisites
33
+
34
+ The z-ai-web-dev-sdk package is already installed. Import it as shown in the examples below.
35
+
36
+ ## CLI Usage (For Simple Tasks)
37
+
38
+ For quick image analysis tasks, you can use the z-ai CLI instead of writing code. This is ideal for simple image descriptions, testing, or automation.
39
+
40
+ ### Basic Image Analysis
41
+
42
+ ```bash
43
+ # Describe an image from URL
44
+ z-ai vision --prompt "What's in this image?" --image "https://example.com/photo.jpg"
45
+
46
+ # Using short options
47
+ z-ai vision -p "Describe this image" -i "https://example.com/image.png"
48
+ ```
49
+
50
+ ### Analyze Local Images
51
+
52
+ ```bash
53
+ # Analyze a local image file
54
+ z-ai vision -p "What objects are in this photo?" -i "./photo.jpg"
55
+
56
+ # Save response to file
57
+ z-ai vision -p "Describe the scene" -i "./landscape.png" -o description.json
58
+ ```
59
+
60
+ ### Multiple Images Comparison
61
+
62
+ ```bash
63
+ # Compare multiple images
64
+ z-ai vision \
65
+ -p "Compare these two images and highlight the differences" \
66
+ -i "./photo1.jpg" \
67
+ -i "./photo2.jpg" \
68
+ -o comparison.json
69
+
70
+ # Analyze a series of images
71
+ z-ai vision \
72
+ --prompt "What patterns do you see across these images?" \
73
+ --image "https://example.com/img1.jpg" \
74
+ --image "https://example.com/img2.jpg" \
75
+ --image "https://example.com/img3.jpg"
76
+ ```
77
+
78
+ ### Advanced Analysis with Thinking
79
+
80
+ ```bash
81
+ # Enable chain-of-thought reasoning for complex tasks
82
+ z-ai vision \
83
+ -p "Count all people in this image and describe what each person is doing" \
84
+ -i "./crowd.jpg" \
85
+ --thinking \
86
+ -o analysis.json
87
+
88
+ # Complex object detection with reasoning
89
+ z-ai vision \
90
+ -p "Identify all safety hazards in this workplace image" \
91
+ -i "./workplace.jpg" \
92
+ --thinking
93
+ ```
94
+
95
+ ### Streaming Output
96
+
97
+ ```bash
98
+ # Stream the analysis in real-time
99
+ z-ai vision -p "Provide a detailed description" -i "./photo.jpg" --stream
100
+ ```
101
+
102
+ ### CLI Parameters
103
+
104
+ - `--prompt, -p <text>`: **Required** - Question or instruction about the image(s)
105
+ - `--image, -i <URL or path>`: Optional - Image URL or local file path (can be used multiple times)
106
+ - `--thinking, -t`: Optional - Enable chain-of-thought reasoning (default: disabled)
107
+ - `--output, -o <path>`: Optional - Output file path (JSON format)
108
+ - `--stream`: Optional - Stream the response in real-time
109
+
110
+ ### Supported Image Formats
111
+
112
+ - PNG (.png) - Best for diagrams, screenshots, graphics with transparency
113
+ - JPEG (.jpg, .jpeg) - Best for photos and complex images
114
+ - GIF (.gif) - Supports both static and animated images
115
+ - WebP (.webp) - Modern format with good compression
116
+ - BMP (.bmp) - Uncompressed bitmap format
117
+
118
+ ### When to Use CLI vs SDK
119
+
120
+ **Use CLI for:**
121
+ - Quick image analysis or descriptions
122
+ - One-off OCR tasks
123
+ - Testing image understanding capabilities
124
+ - Simple batch processing scripts
125
+ - Generating alt text for accessibility
126
+
127
+ **Use SDK for:**
128
+ - Multi-turn conversations about images
129
+ - Complex image processing pipelines
130
+ - Production applications with error handling
131
+ - Custom integration with your application logic
132
+ - Batch processing with custom business logic
133
+
134
+ ## Recommended Approach
135
+
136
+ For better performance and reliability, use base64 encoding to pass images to the model instead of image URLs.
137
+
138
+ ## Basic Image Understanding Implementation
139
+
140
+ ### Single Image Analysis
141
+
142
+ ```javascript
143
+ import ZAI from 'z-ai-web-dev-sdk';
144
+
145
+ async function analyzeImage(imageUrl, prompt) {
146
+ const zai = await ZAI.create();
147
+
148
+ const response = await zai.chat.completions.createVision({
149
+ messages: [
150
+ {
151
+ role: 'user',
152
+ content: [
153
+ {
154
+ type: 'text',
155
+ text: prompt
156
+ },
157
+ {
158
+ type: 'image_url',
159
+ image_url: {
160
+ url: imageUrl
161
+ }
162
+ }
163
+ ]
164
+ }
165
+ ],
166
+ thinking: { type: 'disabled' }
167
+ });
168
+
169
+ return response.choices[0]?.message?.content;
170
+ }
171
+
172
+ // Usage examples
173
+ const description = await analyzeImage(
174
+ 'https://example.com/landscape.jpg',
175
+ 'Describe this landscape in detail, including colors, lighting, and mood'
176
+ );
177
+
178
+ const objectDetection = await analyzeImage(
179
+ 'https://example.com/room.jpg',
180
+ 'List all objects visible in this room'
181
+ );
182
+ ```
183
+
184
+ ### Multiple Images Comparison
185
+
186
+ ```javascript
187
+ import ZAI from 'z-ai-web-dev-sdk';
188
+
189
+ async function compareImages(imageUrls, question) {
190
+ const zai = await ZAI.create();
191
+
192
+ const content = [
193
+ {
194
+ type: 'text',
195
+ text: question
196
+ },
197
+ ...imageUrls.map(url => ({
198
+ type: 'image_url',
199
+ image_url: { url }
200
+ }))
201
+ ];
202
+
203
+ const response = await zai.chat.completions.createVision({
204
+ messages: [
205
+ {
206
+ role: 'user',
207
+ content: content
208
+ }
209
+ ],
210
+ thinking: { type: 'disabled' }
211
+ });
212
+
213
+ return response.choices[0]?.message?.content;
214
+ }
215
+
216
+ // Usage
217
+ const comparison = await compareImages(
218
+ [
219
+ 'https://example.com/before.jpg',
220
+ 'https://example.com/after.jpg'
221
+ ],
222
+ 'What are the key differences between these before and after images?'
223
+ );
224
+ ```
225
+
226
+ ### Base64 Image Support (Recommended)
227
+
228
+ ```javascript
229
+ import ZAI from 'z-ai-web-dev-sdk';
230
+ import fs from 'fs';
231
+ import path from 'path';
232
+
233
+ async function analyzeLocalImage(imagePath, prompt) {
234
+ const zai = await ZAI.create();
235
+
236
+ // Read image file and convert to base64
237
+ const imageBuffer = fs.readFileSync(imagePath);
238
+ const base64Image = imageBuffer.toString('base64');
239
+
240
+ // Determine MIME type based on file extension
241
+ const ext = path.extname(imagePath).toLowerCase();
242
+ const mimeTypes = {
243
+ '.png': 'image/png',
244
+ '.jpg': 'image/jpeg',
245
+ '.jpeg': 'image/jpeg',
246
+ '.gif': 'image/gif',
247
+ '.webp': 'image/webp',
248
+ '.bmp': 'image/bmp'
249
+ };
250
+ const mimeType = mimeTypes[ext] || 'image/jpeg';
251
+
252
+ const response = await zai.chat.completions.createVision({
253
+ messages: [
254
+ {
255
+ role: 'user',
256
+ content: [
257
+ {
258
+ type: 'text',
259
+ text: prompt
260
+ },
261
+ {
262
+ type: 'image_url',
263
+ image_url: {
264
+ url: `data:${mimeType};base64,${base64Image}`
265
+ }
266
+ }
267
+ ]
268
+ }
269
+ ],
270
+ thinking: { type: 'disabled' }
271
+ });
272
+
273
+ return response.choices[0]?.message?.content;
274
+ }
275
+
276
+ // Usage
277
+ const result = await analyzeLocalImage(
278
+ './product-photo.jpg',
279
+ 'Analyze this product image for e-commerce listing'
280
+ );
281
+ ```
282
+
283
+ ## Advanced Use Cases
284
+
285
+ ### OCR and Text Extraction
286
+
287
+ ```javascript
288
+ import ZAI from 'z-ai-web-dev-sdk';
289
+
290
+ async function extractText(imageUrl, options = {}) {
291
+ const zai = await ZAI.create();
292
+
293
+ const prompt = options.preserveLayout
294
+ ? 'Extract all text from this image. Preserve the exact layout, formatting, and structure.'
295
+ : 'Extract all visible text from this image.';
296
+
297
+ const response = await zai.chat.completions.createVision({
298
+ messages: [
299
+ {
300
+ role: 'user',
301
+ content: [
302
+ { type: 'text', text: prompt },
303
+ { type: 'image_url', image_url: { url: imageUrl } }
304
+ ]
305
+ }
306
+ ],
307
+ thinking: { type: 'disabled' }
308
+ });
309
+
310
+ return response.choices[0]?.message?.content;
311
+ }
312
+
313
+ // Usage examples
314
+ const receiptText = await extractText(
315
+ 'https://example.com/receipt.jpg',
316
+ { preserveLayout: true }
317
+ );
318
+
319
+ const businessCardInfo = await extractText(
320
+ 'https://example.com/business-card.jpg'
321
+ );
322
+ ```
323
+
324
+ ### Object Detection and Counting
325
+
326
+ ```javascript
327
+ import ZAI from 'z-ai-web-dev-sdk';
328
+
329
+ async function detectObjects(imageUrl, objectType) {
330
+ const zai = await ZAI.create();
331
+
332
+ const prompt = objectType
333
+ ? `Count and locate all ${objectType} in this image. Provide their positions and describe each one.`
334
+ : 'Detect and list all objects in this image with their approximate locations.';
335
+
336
+ const response = await zai.chat.completions.createVision({
337
+ messages: [
338
+ {
339
+ role: 'user',
340
+ content: [
341
+ { type: 'text', text: prompt },
342
+ { type: 'image_url', image_url: { url: imageUrl } }
343
+ ]
344
+ }
345
+ ],
346
+ thinking: { type: 'enabled' } // Enable thinking for complex counting
347
+ });
348
+
349
+ return response.choices[0]?.message?.content;
350
+ }
351
+
352
+ // Usage
353
+ const peopleCount = await detectObjects(
354
+ 'https://example.com/crowd.jpg',
355
+ 'people'
356
+ );
357
+
358
+ const allObjects = await detectObjects(
359
+ 'https://example.com/room.jpg'
360
+ );
361
+ ```
362
+
363
+ ### Image Classification and Tagging
364
+
365
+ ```javascript
366
+ import ZAI from 'z-ai-web-dev-sdk';
367
+
368
+ async function classifyAndTag(imageUrl) {
369
+ const zai = await ZAI.create();
370
+
371
+ const prompt = `Analyze this image and provide a comprehensive classification:
372
+ 1. Primary category (e.g., nature, urban, portrait, product)
373
+ 2. Subject matter (main focus of the image)
374
+ 3. Style or mood (e.g., professional, casual, artistic, vintage)
375
+ 4. Color palette description
376
+ 5. Suggested tags (10-15 keywords, comma-separated)
377
+
378
+ Format your response as structured JSON.`;
379
+
380
+ const response = await zai.chat.completions.createVision({
381
+ messages: [
382
+ {
383
+ role: 'user',
384
+ content: [
385
+ { type: 'text', text: prompt },
386
+ { type: 'image_url', image_url: { url: imageUrl } }
387
+ ]
388
+ }
389
+ ],
390
+ thinking: { type: 'disabled' }
391
+ });
392
+
393
+ const content = response.choices[0]?.message?.content;
394
+
395
+ try {
396
+ return JSON.parse(content);
397
+ } catch (e) {
398
+ return { rawResponse: content };
399
+ }
400
+ }
401
+
402
+ // Usage
403
+ const classification = await classifyAndTag(
404
+ 'https://example.com/photo.jpg'
405
+ );
406
+ console.log('Tags:', classification.tags);
407
+ ```
408
+
409
+ ### Quality Assessment
410
+
411
+ ```javascript
412
+ import ZAI from 'z-ai-web-dev-sdk';
413
+
414
+ async function assessImageQuality(imageUrl) {
415
+ const zai = await ZAI.create();
416
+
417
+ const prompt = `Assess the technical quality of this image:
418
+ 1. Sharpness and focus (1-10)
419
+ 2. Exposure and brightness (1-10)
420
+ 3. Color balance (1-10)
421
+ 4. Composition (1-10)
422
+ 5. Any technical issues (blur, noise, artifacts, etc.)
423
+ 6. Overall quality rating (1-10)
424
+ 7. Suggestions for improvement
425
+
426
+ Provide specific feedback for each criterion.`;
427
+
428
+ const response = await zai.chat.completions.createVision({
429
+ messages: [
430
+ {
431
+ role: 'user',
432
+ content: [
433
+ { type: 'text', text: prompt },
434
+ { type: 'image_url', image_url: { url: imageUrl } }
435
+ ]
436
+ }
437
+ ],
438
+ thinking: { type: 'disabled' }
439
+ });
440
+
441
+ return response.choices[0]?.message?.content;
442
+ }
443
+ ```
444
+
445
+ ### Accessibility - Alt Text Generation
446
+
447
+ ```javascript
448
+ import ZAI from 'z-ai-web-dev-sdk';
449
+
450
+ async function generateAltText(imageUrl, context = '') {
451
+ const zai = await ZAI.create();
452
+
453
+ const prompt = context
454
+ ? `Generate concise, descriptive alt text for this image. Context: ${context}. Focus on the most important visual elements that convey the image's purpose.`
455
+ : 'Generate concise, descriptive alt text for this image suitable for screen readers. Focus on key visual elements.';
456
+
457
+ const response = await zai.chat.completions.createVision({
458
+ messages: [
459
+ {
460
+ role: 'user',
461
+ content: [
462
+ { type: 'text', text: prompt },
463
+ { type: 'image_url', image_url: { url: imageUrl } }
464
+ ]
465
+ }
466
+ ],
467
+ thinking: { type: 'disabled' }
468
+ });
469
+
470
+ return response.choices[0]?.message?.content;
471
+ }
472
+
473
+ // Usage
474
+ const altText = await generateAltText(
475
+ 'https://example.com/hero-image.jpg',
476
+ 'Website hero section for a tech startup'
477
+ );
478
+ ```
479
+
480
+ ### Scene Understanding
481
+
482
+ ```javascript
483
+ import ZAI from 'z-ai-web-dev-sdk';
484
+
485
+ async function understandScene(imageUrl) {
486
+ const zai = await ZAI.create();
487
+
488
+ const prompt = `Provide a comprehensive scene analysis:
489
+ 1. Setting/location type (indoor/outdoor, specific place)
490
+ 2. Time of day and lighting conditions
491
+ 3. Weather (if applicable)
492
+ 4. People present (number, activities, interactions)
493
+ 5. Key objects and their arrangement
494
+ 6. Overall atmosphere and mood
495
+ 7. Notable details or interesting elements`;
496
+
497
+ const response = await zai.chat.completions.createVision({
498
+ messages: [
499
+ {
500
+ role: 'user',
501
+ content: [
502
+ { type: 'text', text: prompt },
503
+ { type: 'image_url', image_url: { url: imageUrl } }
504
+ ]
505
+ }
506
+ ],
507
+ thinking: { type: 'disabled' }
508
+ });
509
+
510
+ return response.choices[0]?.message?.content;
511
+ }
512
+ ```
513
+
514
+ ## Batch Processing
515
+
516
+ ### Process Multiple Images
517
+
518
+ ```javascript
519
+ import ZAI from 'z-ai-web-dev-sdk';
520
+
521
+ class ImageBatchProcessor {
522
+ constructor() {
523
+ this.zai = null;
524
+ }
525
+
526
+ async initialize() {
527
+ this.zai = await ZAI.create();
528
+ }
529
+
530
+ async processImage(imageUrl, prompt) {
531
+ const response = await this.zai.chat.completions.createVision({
532
+ messages: [
533
+ {
534
+ role: 'user',
535
+ content: [
536
+ { type: 'text', text: prompt },
537
+ { type: 'image_url', image_url: { url: imageUrl } }
538
+ ]
539
+ }
540
+ ],
541
+ thinking: { type: 'disabled' }
542
+ });
543
+
544
+ return response.choices[0]?.message?.content;
545
+ }
546
+
547
+ async processBatch(imageUrls, prompt) {
548
+ const results = [];
549
+
550
+ for (const imageUrl of imageUrls) {
551
+ try {
552
+ const result = await this.processImage(imageUrl, prompt);
553
+ results.push({ imageUrl, success: true, result });
554
+ } catch (error) {
555
+ results.push({
556
+ imageUrl,
557
+ success: false,
558
+ error: error.message
559
+ });
560
+ }
561
+ }
562
+
563
+ return results;
564
+ }
565
+ }
566
+
567
+ // Usage
568
+ const processor = new ImageBatchProcessor();
569
+ await processor.initialize();
570
+
571
+ const images = [
572
+ 'https://example.com/img1.jpg',
573
+ 'https://example.com/img2.jpg',
574
+ 'https://example.com/img3.jpg'
575
+ ];
576
+
577
+ const results = await processor.processBatch(
578
+ images,
579
+ 'Generate a short description suitable for social media'
580
+ );
581
+ ```
582
+
583
+ ## Best Practices
584
+
585
+ ### 1. Image Quality and Preparation
586
+ - Use high-resolution images for better analysis accuracy
587
+ - Ensure images are well-lit and properly exposed
588
+ - For OCR, ensure text is clear and readable
589
+ - Optimize file size to balance quality and performance
590
+ - Supported formats: PNG (best for text/diagrams), JPEG (best for photos), WebP, GIF, BMP
591
+
592
+ ### 2. Prompt Engineering for Images
593
+ - Be specific about what information you need
594
+ - Mention the type of image (photo, diagram, screenshot, etc.)
595
+ - For complex tasks, break down into specific questions
596
+ - Use structured prompts for JSON output
597
+ - Include context when relevant
598
+
599
+ ### 3. Error Handling
600
+
601
+ ```javascript
602
+ async function safeImageAnalysis(imageUrl, prompt) {
603
+ try {
604
+ const zai = await ZAI.create();
605
+
606
+ const response = await zai.chat.completions.createVision({
607
+ messages: [
608
+ {
609
+ role: 'user',
610
+ content: [
611
+ { type: 'text', text: prompt },
612
+ { type: 'image_url', image_url: { url: imageUrl } }
613
+ ]
614
+ }
615
+ ],
616
+ thinking: { type: 'disabled' }
617
+ });
618
+
619
+ return {
620
+ success: true,
621
+ content: response.choices[0]?.message?.content
622
+ };
623
+ } catch (error) {
624
+ console.error('Image analysis error:', error);
625
+ return {
626
+ success: false,
627
+ error: error.message
628
+ };
629
+ }
630
+ }
631
+ ```
632
+
633
+ ### 4. Performance Optimization
634
+ - Cache SDK instance for batch processing
635
+ - Use base64 encoding for local images
636
+ - Implement request throttling for large batches
637
+ - Consider image preprocessing (resize, compress) for large files
638
+ - Use appropriate thinking mode (disabled for simple tasks, enabled for complex reasoning)
639
+
640
+ ### 5. Security Considerations
641
+ - Validate image URLs before processing
642
+ - Implement rate limiting for public APIs
643
+ - Sanitize user-provided image data
644
+ - Never expose SDK credentials in client-side code
645
+ - Implement content moderation for user-uploaded images
646
+
647
+ ## Common Use Cases
648
+
649
+ 1. **E-commerce Product Analysis**: Analyze product images, extract features, generate descriptions
650
+ 2. **Document Processing**: Extract text from receipts, invoices, forms, business cards
651
+ 3. **Content Moderation**: Detect inappropriate content, verify image compliance
652
+ 4. **Quality Control**: Identify defects, assess product quality in manufacturing
653
+ 5. **Accessibility**: Generate alt text for images automatically
654
+ 6. **Image Cataloging**: Auto-tag and categorize image libraries
655
+ 7. **Visual Search**: Understand and index images for search functionality
656
+ 8. **Medical Imaging**: Preliminary analysis with appropriate disclaimers
657
+ 9. **Real Estate**: Analyze property photos, extract features
658
+ 10. **Social Media**: Generate captions, hashtags, and descriptions
659
+
660
+ ## Integration Examples
661
+
662
+ ### Express.js API Endpoint
663
+
664
+ ```javascript
665
+ import express from 'express';
666
+ import ZAI from 'z-ai-web-dev-sdk';
667
+ import multer from 'multer';
668
+
669
+ const app = express();
670
+ const upload = multer({ storage: multer.memoryStorage() });
671
+
672
+ let zaiInstance;
673
+
674
+ async function initZAI() {
675
+ zaiInstance = await ZAI.create();
676
+ }
677
+
678
+ // Analyze image from URL
679
+ app.post('/api/analyze-image', express.json(), async (req, res) => {
680
+ try {
681
+ const { imageUrl, prompt } = req.body;
682
+
683
+ if (!imageUrl || !prompt) {
684
+ return res.status(400).json({
685
+ error: 'imageUrl and prompt are required'
686
+ });
687
+ }
688
+
689
+ const response = await zaiInstance.chat.completions.createVision({
690
+ messages: [
691
+ {
692
+ role: 'user',
693
+ content: [
694
+ { type: 'text', text: prompt },
695
+ { type: 'image_url', image_url: { url: imageUrl } }
696
+ ]
697
+ }
698
+ ],
699
+ thinking: { type: 'disabled' }
700
+ });
701
+
702
+ res.json({
703
+ success: true,
704
+ analysis: response.choices[0]?.message?.content
705
+ });
706
+ } catch (error) {
707
+ res.status(500).json({
708
+ success: false,
709
+ error: error.message
710
+ });
711
+ }
712
+ });
713
+
714
+ // Analyze uploaded image file
715
+ app.post('/api/analyze-upload', upload.single('image'), async (req, res) => {
716
+ try {
717
+ const { prompt } = req.body;
718
+ const imageFile = req.file;
719
+
720
+ if (!imageFile || !prompt) {
721
+ return res.status(400).json({
722
+ error: 'image file and prompt are required'
723
+ });
724
+ }
725
+
726
+ // Convert to base64
727
+ const base64Image = imageFile.buffer.toString('base64');
728
+ const mimeType = imageFile.mimetype;
729
+
730
+ const response = await zaiInstance.chat.completions.createVision({
731
+ messages: [
732
+ {
733
+ role: 'user',
734
+ content: [
735
+ { type: 'text', text: prompt },
736
+ {
737
+ type: 'image_url',
738
+ image_url: {
739
+ url: `data:${mimeType};base64,${base64Image}`
740
+ }
741
+ }
742
+ ]
743
+ }
744
+ ],
745
+ thinking: { type: 'disabled' }
746
+ });
747
+
748
+ res.json({
749
+ success: true,
750
+ analysis: response.choices[0]?.message?.content
751
+ });
752
+ } catch (error) {
753
+ res.status(500).json({
754
+ success: false,
755
+ error: error.message
756
+ });
757
+ }
758
+ });
759
+
760
+ initZAI().then(() => {
761
+ app.listen(3000, () => {
762
+ console.log('Image understanding API running on port 3000');
763
+ });
764
+ });
765
+ ```
766
+
767
+ ### Next.js API Route
768
+
769
+ ```javascript
770
+ // pages/api/image-understand.js
771
+ import ZAI from 'z-ai-web-dev-sdk';
772
+
773
+ let zaiInstance = null;
774
+
775
+ async function getZAI() {
776
+ if (!zaiInstance) {
777
+ zaiInstance = await ZAI.create();
778
+ }
779
+ return zaiInstance;
780
+ }
781
+
782
+ export default async function handler(req, res) {
783
+ if (req.method !== 'POST') {
784
+ return res.status(405).json({ error: 'Method not allowed' });
785
+ }
786
+
787
+ try {
788
+ const { imageUrl, prompt } = req.body;
789
+
790
+ if (!imageUrl || !prompt) {
791
+ return res.status(400).json({
792
+ error: 'imageUrl and prompt are required'
793
+ });
794
+ }
795
+
796
+ const zai = await getZAI();
797
+
798
+ const response = await zai.chat.completions.createVision({
799
+ messages: [
800
+ {
801
+ role: 'user',
802
+ content: [
803
+ { type: 'text', text: prompt },
804
+ { type: 'image_url', image_url: { url: imageUrl } }
805
+ ]
806
+ }
807
+ ],
808
+ thinking: { type: 'disabled' }
809
+ });
810
+
811
+ res.status(200).json({
812
+ success: true,
813
+ analysis: response.choices[0]?.message?.content
814
+ });
815
+ } catch (error) {
816
+ console.error('Error:', error);
817
+ res.status(500).json({
818
+ success: false,
819
+ error: error.message
820
+ });
821
+ }
822
+ }
823
+ ```
824
+
825
+ ## Troubleshooting
826
+
827
+ **Issue**: "SDK must be used in backend"
828
+ - **Solution**: Ensure z-ai-web-dev-sdk is only imported and used in server-side code, never in client/browser code
829
+
830
+ **Issue**: Image not loading or being analyzed
831
+ - **Solution**: Verify the image URL is accessible, returns correct MIME type, and is in a supported format
832
+
833
+ **Issue**: Poor OCR accuracy
834
+ - **Solution**: Ensure text is clear and readable, increase image resolution, ensure proper lighting and contrast
835
+
836
+ **Issue**: Inaccurate object detection or counting
837
+ - **Solution**: Enable thinking mode for complex counting tasks, use high-resolution images, provide specific prompts
838
+
839
+ **Issue**: Slow response times
840
+ - **Solution**: Optimize image size (resize before upload), use base64 for local images, cache SDK instance for batch processing
841
+
842
+ **Issue**: Base64 encoding fails
843
+ - **Solution**: Verify file path is correct, check file permissions, ensure MIME type matches file extension
844
+
845
+ ## Remember
846
+
847
+ - Always use z-ai-web-dev-sdk in backend code only
848
+ - The SDK is already installed - import as shown in examples
849
+ - Use `image_url` content type for static images
850
+ - Base64 encoding is recommended for better performance
851
+ - Structure prompts clearly for best results
852
+ - Enable thinking mode for complex reasoning tasks (counting, detailed analysis)
853
+ - Handle errors gracefully in production
854
+ - Validate and sanitize user inputs
855
+ - Consider privacy and security when processing user images