@lobehub/chat 1.67.1 → 1.68.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/.env.example +4 -0
  2. package/CHANGELOG.md +58 -0
  3. package/Dockerfile +2 -0
  4. package/Dockerfile.database +2 -0
  5. package/README.md +3 -2
  6. package/README.zh-CN.md +1 -1
  7. package/changelog/v1.json +21 -0
  8. package/docs/self-hosting/advanced/auth.mdx +6 -5
  9. package/docs/self-hosting/advanced/auth.zh-CN.mdx +6 -5
  10. package/docs/self-hosting/environment-variables/model-provider.mdx +16 -0
  11. package/docs/self-hosting/environment-variables/model-provider.zh-CN.mdx +16 -0
  12. package/docs/usage/providers/ppio.mdx +57 -0
  13. package/docs/usage/providers/ppio.zh-CN.mdx +55 -0
  14. package/locales/ar/models.json +3 -0
  15. package/locales/ar/plugin.json +1 -1
  16. package/locales/bg-BG/models.json +3 -0
  17. package/locales/bg-BG/plugin.json +1 -1
  18. package/locales/de-DE/models.json +3 -0
  19. package/locales/de-DE/plugin.json +1 -1
  20. package/locales/en-US/models.json +3 -0
  21. package/locales/en-US/plugin.json +1 -1
  22. package/locales/en-US/providers.json +3 -0
  23. package/locales/es-ES/models.json +3 -0
  24. package/locales/es-ES/plugin.json +1 -1
  25. package/locales/fa-IR/models.json +3 -0
  26. package/locales/fa-IR/plugin.json +1 -1
  27. package/locales/fr-FR/models.json +3 -0
  28. package/locales/fr-FR/plugin.json +1 -1
  29. package/locales/it-IT/models.json +3 -0
  30. package/locales/it-IT/plugin.json +1 -1
  31. package/locales/ja-JP/models.json +3 -0
  32. package/locales/ja-JP/plugin.json +1 -1
  33. package/locales/ko-KR/models.json +3 -0
  34. package/locales/ko-KR/plugin.json +1 -1
  35. package/locales/nl-NL/models.json +3 -0
  36. package/locales/nl-NL/plugin.json +1 -1
  37. package/locales/pl-PL/models.json +3 -0
  38. package/locales/pl-PL/plugin.json +1 -1
  39. package/locales/pt-BR/models.json +3 -0
  40. package/locales/pt-BR/plugin.json +1 -1
  41. package/locales/ru-RU/models.json +3 -0
  42. package/locales/ru-RU/plugin.json +1 -1
  43. package/locales/tr-TR/models.json +3 -0
  44. package/locales/tr-TR/plugin.json +1 -1
  45. package/locales/vi-VN/models.json +3 -0
  46. package/locales/vi-VN/plugin.json +1 -1
  47. package/locales/zh-CN/models.json +3 -0
  48. package/locales/zh-CN/plugin.json +1 -1
  49. package/locales/zh-CN/providers.json +4 -0
  50. package/locales/zh-TW/models.json +3 -0
  51. package/locales/zh-TW/plugin.json +1 -1
  52. package/package.json +5 -5
  53. package/packages/web-crawler/src/__test__/crawler.test.ts +176 -0
  54. package/packages/web-crawler/src/crawler.ts +12 -6
  55. package/packages/web-crawler/src/type.ts +3 -0
  56. package/packages/web-crawler/src/urlRules.ts +11 -0
  57. package/packages/web-crawler/src/utils/appUrlRules.test.ts +76 -0
  58. package/packages/web-crawler/src/utils/appUrlRules.ts +3 -0
  59. package/src/app/[variants]/(main)/settings/llm/ProviderList/providers.tsx +2 -0
  60. package/src/config/aiModels/index.ts +3 -0
  61. package/src/config/aiModels/ppio.ts +276 -0
  62. package/src/config/llm.ts +6 -0
  63. package/src/config/modelProviders/index.ts +4 -0
  64. package/src/config/modelProviders/ppio.ts +249 -0
  65. package/src/libs/agent-runtime/AgentRuntime.ts +7 -0
  66. package/src/libs/agent-runtime/ppio/__snapshots__/index.test.ts.snap +26 -0
  67. package/src/libs/agent-runtime/ppio/fixtures/models.json +42 -0
  68. package/src/libs/agent-runtime/ppio/index.test.ts +264 -0
  69. package/src/libs/agent-runtime/ppio/index.ts +51 -0
  70. package/src/libs/agent-runtime/ppio/type.ts +12 -0
  71. package/src/libs/agent-runtime/types/type.ts +1 -0
  72. package/src/libs/agent-runtime/utils/anthropicHelpers.ts +2 -2
  73. package/src/locales/default/plugin.ts +1 -1
  74. package/src/server/routers/tools/__test__/search.test.ts +146 -0
  75. package/src/server/routers/tools/search.ts +1 -1
  76. package/src/store/chat/slices/builtinTool/actions/searXNG.test.ts +67 -0
  77. package/src/store/chat/slices/builtinTool/actions/searXNG.ts +2 -1
  78. package/src/store/tool/slices/builtin/selectors.test.ts +12 -0
  79. package/src/store/tool/slices/builtin/selectors.ts +4 -1
  80. package/src/tools/web-browsing/Portal/PageContent/index.tsx +13 -7
  81. package/src/tools/web-browsing/const.ts +2 -0
  82. package/src/types/user/settings/keyVaults.ts +1 -0
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "미리보기",
147
147
  "raw": "원본 텍스트",
148
- "tooLong": "텍스트 내용이 너무 깁니다. 대화 맥락은 처음 10000자만 유지되며, 초과 부분은 대화 맥락에 포함되지 않습니다."
148
+ "tooLong": "텍스트 내용이 너무 깁니다. 대화 맥락은 앞의 {{characters}}자만 유지되며, 초과 부분은 대화 맥락에 포함되지 않습니다."
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "크롤링 모드",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "Het nieuwste GPT-4 Turbo-model heeft visuele functies. Nu kunnen visuele verzoeken worden gedaan met behulp van JSON-indeling en functieaanroepen. GPT-4 Turbo is een verbeterde versie die kosteneffectieve ondersteuning biedt voor multimodale taken. Het vindt een balans tussen nauwkeurigheid en efficiëntie, geschikt voor toepassingen die realtime interactie vereisen."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "De onderzoekspreview van GPT-4.5, ons grootste en krachtigste GPT-model tot nu toe. Het heeft een uitgebreide wereldkennis en kan de intenties van gebruikers beter begrijpen, waardoor het uitblinkt in creatieve taken en autonome planning. GPT-4.5 accepteert tekst- en afbeeldingsinvoer en genereert tekstuitvoer (inclusief gestructureerde uitvoer). Het ondersteunt belangrijke ontwikkelaarsfuncties zoals functieaanroepen, batch-API's en streaminguitvoer. In taken die creativiteit, open denken en dialoog vereisen (zoals schrijven, leren of het verkennen van nieuwe ideeën), presteert GPT-4.5 bijzonder goed. De kennis is bijgewerkt tot oktober 2023."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o is een dynamisch model dat in realtime wordt bijgewerkt om de meest actuele versie te behouden. Het combineert krachtige taalbegrip- en generatiecapaciteiten, geschikt voor grootschalige toepassingsscenario's, waaronder klantenservice, onderwijs en technische ondersteuning."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Voorbeeld",
147
147
  "raw": "Oorspronkelijke tekst",
148
- "tooLong": "De tekstinhoud is te lang, de gesprekscontext behoudt alleen de eerste 10000 tekens, het gedeelte daarboven wordt niet meegerekend in de gesprekscontext"
148
+ "tooLong": "De tekstinhoud is te lang, de gesprekscontext houdt alleen de eerste {{characters}} tekens vast, het overschot wordt niet meegerekend in de gesprekscontext"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Crawler-modus",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "Najnowszy model GPT-4 Turbo posiada funkcje wizualne. Teraz zapytania wizualne mogą być obsługiwane za pomocą formatu JSON i wywołań funkcji. GPT-4 Turbo to ulepszona wersja, która oferuje opłacalne wsparcie dla zadań multimodalnych. Znajduje równowagę między dokładnością a wydajnością, co czyni go odpowiednim do aplikacji wymagających interakcji w czasie rzeczywistym."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "Wersja badawcza GPT-4.5, która jest naszym największym i najpotężniejszym modelem GPT do tej pory. Posiada szeroką wiedzę o świecie i lepiej rozumie intencje użytkowników, co sprawia, że doskonale radzi sobie w zadaniach kreatywnych i autonomicznym planowaniu. GPT-4.5 akceptuje tekstowe i graficzne wejścia oraz generuje wyjścia tekstowe (w tym wyjścia strukturalne). Wspiera kluczowe funkcje dla deweloperów, takie jak wywołania funkcji, API wsadowe i strumieniowe wyjścia. W zadaniach wymagających kreatywności, otwartego myślenia i dialogu (takich jak pisanie, nauka czy odkrywanie nowych pomysłów), GPT-4.5 sprawdza się szczególnie dobrze. Data graniczna wiedzy to październik 2023."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o to dynamiczny model, który jest na bieżąco aktualizowany, aby utrzymać najnowszą wersję. Łączy potężne zdolności rozumienia i generowania języka, co czyni go odpowiednim do zastosowań na dużą skalę, w tym obsługi klienta, edukacji i wsparcia technicznego."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Podgląd",
147
147
  "raw": "Tekst źródłowy",
148
- "tooLong": "Treść tekstu jest zbyt długa, kontekst rozmowy zachowuje tylko pierwsze 10000 znaków, a nadmiar nie jest uwzględniany w kontekście rozmowy"
148
+ "tooLong": "Treść tekstu jest zbyt długa, kontekst rozmowy zachowuje tylko pierwsze {{characters}} znaków, a nadmiar nie jest uwzględniany w kontekście rozmowy"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Tryb przeszukiwania",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "O mais recente modelo GPT-4 Turbo possui funcionalidades visuais. Agora, solicitações visuais podem ser feitas usando o modo JSON e chamadas de função. O GPT-4 Turbo é uma versão aprimorada, oferecendo suporte econômico para tarefas multimodais. Ele encontra um equilíbrio entre precisão e eficiência, adequado para aplicações que requerem interação em tempo real."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "Versão de pesquisa do GPT-4.5, que é o nosso maior e mais poderoso modelo GPT até agora. Ele possui um amplo conhecimento sobre o mundo e consegue entender melhor a intenção do usuário, destacando-se em tarefas criativas e planejamento autônomo. O GPT-4.5 aceita entradas de texto e imagem, gerando saídas de texto (incluindo saídas estruturadas). Suporta recursos essenciais para desenvolvedores, como chamadas de função, API em lote e saída em fluxo. O GPT-4.5 se destaca especialmente em tarefas que requerem criatividade, pensamento aberto e diálogo (como escrita, aprendizado ou exploração de novas ideias). A data limite do conhecimento é outubro de 2023."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "O ChatGPT-4o é um modelo dinâmico, atualizado em tempo real para manter a versão mais atual. Ele combina uma poderosa capacidade de compreensão e geração de linguagem, adequado para cenários de aplicação em larga escala, incluindo atendimento ao cliente, educação e suporte técnico."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Prévia",
147
147
  "raw": "Texto original",
148
- "tooLong": "O conteúdo do texto é muito longo, o contexto da conversa manterá apenas os primeiros 10000 caracteres, e a parte excedente não será considerada no contexto da conversa"
148
+ "tooLong": "O conteúdo do texto é muito longo, o contexto da conversa manterá apenas os primeiros {{characters}} caracteres, e a parte excedente não será considerada no contexto da conversa"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Modo de Rastreamento",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "Последняя модель GPT-4 Turbo обладает визуальными функциями. Теперь визуальные запросы могут использовать JSON-формат и вызовы функций. GPT-4 Turbo — это улучшенная версия, обеспечивающая экономически эффективную поддержку для мультимодальных задач. Она находит баланс между точностью и эффективностью, подходя для приложений, требующих взаимодействия в реальном времени."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "Предварительная версия исследования GPT-4.5, это наша самая большая и мощная модель GPT на сегодняшний день. Она обладает обширными знаниями о мире и лучше понимает намерения пользователей, что делает её выдающейся в творческих задачах и автономном планировании. GPT-4.5 принимает текстовые и графические входные данные и генерирует текстовый вывод (включая структурированный вывод). Поддерживает ключевые функции для разработчиков, такие как вызовы функций, пакетный API и потоковый вывод. В задачах, требующих креативного, открытого мышления и диалога (таких как написание, обучение или исследование новых идей), GPT-4.5 особенно эффективен. Дата окончания знаний - октябрь 2023 года."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o — это динамическая модель, которая обновляется в реальном времени, чтобы оставаться актуальной. Она сочетает в себе мощное понимание языка и генерацию, подходя для масштабных приложений, включая обслуживание клиентов, образование и техническую поддержку."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Предварительный просмотр",
147
147
  "raw": "Исходный текст",
148
- "tooLong": "Содержимое текста слишком длинное, контекст диалога сохраняет только первые 10000 символов, превышающая часть не учитывается в контексте диалога"
148
+ "tooLong": "Содержимое текста слишком длинное, в контексте диалога сохраняются только первые {{characters}} символов, а остальная часть не учитывается в контексте разговора"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Режим обхода",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "En son GPT-4 Turbo modeli görsel işlevselliğe sahiptir. Artık görsel talepler JSON formatı ve fonksiyon çağrıları ile işlenebilir. GPT-4 Turbo, çok modlu görevler için maliyet etkin bir destek sunan geliştirilmiş bir versiyondur. Doğruluk ve verimlilik arasında bir denge sağlar, gerçek zamanlı etkileşim gerektiren uygulama senaryoları için uygundur."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "GPT-4.5'in araştırma önizleme sürümü, şimdiye kadar geliştirdiğimiz en büyük ve en güçlü GPT modelidir. Geniş bir dünya bilgisine sahip olup, kullanıcı niyetlerini daha iyi anlayarak yaratıcı görevler ve bağımsız planlama konularında mükemmel bir performans sergilemektedir. GPT-4.5, metin ve görsel girdi alabilir ve metin çıktısı (yapılandırılmış çıktı dahil) üretebilir. Fonksiyon çağrıları, toplu API ve akış çıktısı gibi önemli geliştirici özelliklerini destekler. Yaratıcılık, açık düşünme ve diyalog gerektiren görevlerde (örneğin yazma, öğrenme veya yeni fikirler keşfetme) GPT-4.5 özellikle başarılıdır. Bilgi kesim tarihi Ekim 2023'tür."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o, güncel versiyonunu korumak için gerçek zamanlı olarak güncellenen dinamik bir modeldir. Güçlü dil anlama ve üretme yeteneklerini birleştirir, müşteri hizmetleri, eğitim ve teknik destek gibi geniş ölçekli uygulama senaryoları için uygundur."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Önizleme",
147
147
  "raw": "Ham metin",
148
- "tooLong": "Metin içeriği çok uzun, diyalog bağlamında yalnızca ilk 10000 karakter saklanır, fazlası bağlamda dikkate alınmaz"
148
+ "tooLong": "Metin içeriği çok uzun, diyalog bağlamında yalnızca ilk {{characters}} karakter saklanacak, fazlası diyalog bağlamına dahil edilmeyecek."
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Tarayıcı Modu",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "Mô hình GPT-4 Turbo mới nhất có chức năng hình ảnh. Hiện tại, các yêu cầu hình ảnh có thể sử dụng chế độ JSON và gọi hàm. GPT-4 Turbo là một phiên bản nâng cao, cung cấp hỗ trợ chi phí hiệu quả cho các nhiệm vụ đa phương tiện. Nó tìm thấy sự cân bằng giữa độ chính xác và hiệu quả, phù hợp cho các ứng dụng cần tương tác theo thời gian thực."
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "Bản nghiên cứu preview của GPT-4.5, đây là mô hình GPT lớn nhất và mạnh mẽ nhất mà chúng tôi từng phát triển. Nó sở hữu kiến thức rộng lớn về thế giới và có khả năng hiểu ý định của người dùng tốt hơn, giúp nó thể hiện xuất sắc trong các nhiệm vụ sáng tạo và lập kế hoạch tự động. GPT-4.5 có thể chấp nhận đầu vào văn bản và hình ảnh, và tạo ra đầu ra văn bản (bao gồm cả đầu ra có cấu trúc). Hỗ trợ các tính năng quan trọng cho nhà phát triển như gọi hàm, API theo lô và đầu ra theo luồng. Trong các nhiệm vụ cần sự sáng tạo, tư duy mở và đối thoại (như viết lách, học tập hoặc khám phá ý tưởng mới), GPT-4.5 thể hiện đặc biệt xuất sắc. Thời điểm cắt đứt kiến thức là tháng 10 năm 2023."
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o là một mô hình động, được cập nhật theo thời gian thực để giữ phiên bản mới nhất. Nó kết hợp khả năng hiểu và sinh ngôn ngữ mạnh mẽ, phù hợp cho các ứng dụng quy mô lớn, bao gồm dịch vụ khách hàng, giáo dục và hỗ trợ kỹ thuật."
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "Xem trước",
147
147
  "raw": "Văn bản gốc",
148
- "tooLong": "Nội dung văn bản quá dài, ngữ cảnh cuộc trò chuyện chỉ giữ lại 10000 ký tự đầu tiên, phần vượt quá sẽ không được tính vào ngữ cảnh cuộc trò chuyện"
148
+ "tooLong": "Nội dung văn bản quá dài, chỉ giữ lại {{characters}} ký tự đầu tiên trong ngữ cảnh cuộc trò chuyện, phần vượt quá sẽ không được tính vào ngữ cảnh cuộc trò chuyện"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "Chế độ thu thập",
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "GPT-4 视觉预览版,专为图像分析和处理任务设计。"
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "GPT-4.5 的研究预览版,它是我们迄今为止最大、最强大的 GPT 模型。它拥有广泛的世界知识,并能更好地理解用户意图,使其在创造性任务和自主规划方面表现出色。GPT-4.5 可接受文本和图像输入,并生成文本输出(包括结构化输出)。支持关键的开发者功能,如函数调用、批量 API 和流式输出。在需要创造性、开放式思考和对话的任务(如写作、学习或探索新想法)中,GPT-4.5 表现尤为出色。知识截止日期为 2023 年 10 月。"
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o 是一款动态模型,实时更新以保持当前最新版本。它结合了强大的语言理解与生成能力,适合于大规模应用场景,包括客户服务、教育和技术支持。"
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "预览",
147
147
  "raw": "原始文本",
148
- "tooLong": "文本内容过长,对话上下文仅保留前 10000 字符,超过部分不计入会话上下文"
148
+ "tooLong": "文本内容过长,对话上下文仅保留前 {{characters}} 字符,超过部分不计入会话上下文"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "抓取模式",
@@ -139,5 +139,9 @@
139
139
  },
140
140
  "zhipu": {
141
141
  "description": "智谱 AI 提供多模态与语言模型的开放平台,支持广泛的AI应用场景,包括文本处理、图像理解与编程辅助等。"
142
+ },
143
+ "ppio": {
144
+ "description": "PPIO 派欧云提供稳定、高性价比的开源模型 API 服务,支持 DeepSeek 全系列、Llama、Qwen 等行业领先大模型。"
142
145
  }
143
146
  }
147
+
@@ -953,6 +953,9 @@
953
953
  "gpt-4-vision-preview": {
954
954
  "description": "最新的GPT-4 Turbo模型具備視覺功能。現在,視覺請求可以使用JSON模式和函數調用。GPT-4 Turbo是一個增強版本,為多模態任務提供成本效益高的支持。它在準確性和效率之間找到平衡,適合需要進行實時交互的應用程序場景。"
955
955
  },
956
+ "gpt-4.5-preview": {
957
+ "description": "GPT-4.5 的研究預覽版,它是我們迄今為止最大、最強大的 GPT 模型。它擁有廣泛的世界知識,並能更好地理解用戶意圖,使其在創造性任務和自主規劃方面表現出色。GPT-4.5 可接受文本和圖像輸入,並生成文本輸出(包括結構化輸出)。支持關鍵的開發者功能,如函數調用、批量 API 和串流輸出。在需要創造性、開放式思考和對話的任務(如寫作、學習或探索新想法)中,GPT-4.5 表現尤為出色。知識截止日期為 2023 年 10 月。"
958
+ },
956
959
  "gpt-4o": {
957
960
  "description": "ChatGPT-4o是一款動態模型,實時更新以保持當前最新版本。它結合了強大的語言理解與生成能力,適合於大規模應用場景,包括客戶服務、教育和技術支持。"
958
961
  },
@@ -145,7 +145,7 @@
145
145
  "detail": {
146
146
  "preview": "預覽",
147
147
  "raw": "原始文本",
148
- "tooLong": "文本內容過長,對話上下文僅保留前 10000 字元,超過部分不計入會話上下文"
148
+ "tooLong": "文本內容過長,對話上下文僅保留前 {{characters}} 字元,超過部分不計入會話上下文"
149
149
  },
150
150
  "meta": {
151
151
  "crawler": "抓取模式",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.67.1",
3
+ "version": "1.68.0",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -30,11 +30,11 @@
30
30
  ],
31
31
  "scripts": {
32
32
  "build": "next build",
33
- "build:analyze": "ANALYZE=true next build",
34
- "build:docker": "DOCKER=true next build && npm run build-sitemap",
35
33
  "postbuild": "npm run build-sitemap && npm run build-migrate-db",
36
34
  "build-migrate-db": "bun run db:migrate",
37
35
  "build-sitemap": "tsx ./scripts/buildSitemapIndex/index.ts",
36
+ "build:analyze": "ANALYZE=true next build",
37
+ "build:docker": "DOCKER=true next build && npm run build-sitemap",
38
38
  "db:generate": "drizzle-kit generate && npm run db:generate-client",
39
39
  "db:generate-client": "tsx ./scripts/migrateClientDB/compile-migrations.ts",
40
40
  "db:migrate": "MIGRATION_DB=1 tsx ./scripts/migrateServerDB/index.ts",
@@ -62,11 +62,11 @@
62
62
  "start": "next start -p 3210",
63
63
  "stylelint": "stylelint \"src/**/*.{js,jsx,ts,tsx}\" --fix",
64
64
  "test": "npm run test-app && npm run test-server",
65
- "test:update": "vitest -u",
66
65
  "test-app": "vitest run --config vitest.config.ts",
67
66
  "test-app:coverage": "vitest run --config vitest.config.ts --coverage",
68
67
  "test-server": "vitest run --config vitest.server.config.ts",
69
68
  "test-server:coverage": "vitest run --config vitest.server.config.ts --coverage",
69
+ "test:update": "vitest -u",
70
70
  "type-check": "tsc --noEmit",
71
71
  "webhook:ngrok": "ngrok http http://localhost:3011",
72
72
  "workflow:cdn": "tsx ./scripts/cdnWorkflow/index.ts",
@@ -107,7 +107,7 @@
107
107
  "dependencies": {
108
108
  "@ant-design/icons": "^5.5.2",
109
109
  "@ant-design/pro-components": "^2.8.3",
110
- "@anthropic-ai/sdk": "^0.37.0",
110
+ "@anthropic-ai/sdk": "^0.39.0",
111
111
  "@auth/core": "^0.38.0",
112
112
  "@aws-sdk/client-bedrock-runtime": "^3.723.0",
113
113
  "@aws-sdk/client-s3": "^3.723.0",
@@ -0,0 +1,176 @@
1
+ import { describe, expect, it, vi } from 'vitest';
2
+
3
+ import { Crawler } from '../crawler';
4
+
5
+ // Move mocks outside of test cases to avoid hoisting issues
6
+ vi.mock('../crawImpl', () => ({
7
+ crawlImpls: {
8
+ naive: vi.fn(),
9
+ jina: vi.fn(),
10
+ browserless: vi.fn(),
11
+ },
12
+ }));
13
+
14
+ vi.mock('../utils/appUrlRules', () => ({
15
+ applyUrlRules: vi.fn().mockReturnValue({
16
+ transformedUrl: 'https://example.com',
17
+ filterOptions: {},
18
+ }),
19
+ }));
20
+
21
+ describe('Crawler', () => {
22
+ const crawler = new Crawler();
23
+
24
+ it('should crawl successfully with default impls', async () => {
25
+ const mockResult = {
26
+ content: 'test content',
27
+ contentType: 'text' as const,
28
+ url: 'https://example.com',
29
+ };
30
+
31
+ const { crawlImpls } = await import('../crawImpl');
32
+ vi.mocked(crawlImpls.naive).mockResolvedValue(mockResult);
33
+
34
+ const result = await crawler.crawl({
35
+ url: 'https://example.com',
36
+ });
37
+
38
+ expect(result).toEqual({
39
+ crawler: 'naive',
40
+ data: mockResult,
41
+ originalUrl: 'https://example.com',
42
+ transformedUrl: undefined,
43
+ });
44
+ });
45
+
46
+ it('should use user provided impls', async () => {
47
+ const mockResult = {
48
+ content: 'test content',
49
+ contentType: 'text' as const,
50
+ url: 'https://example.com',
51
+ };
52
+
53
+ const { crawlImpls } = await import('../crawImpl');
54
+ vi.mocked(crawlImpls.jina).mockResolvedValue(mockResult);
55
+
56
+ const result = await crawler.crawl({
57
+ impls: ['jina'],
58
+ url: 'https://example.com',
59
+ });
60
+
61
+ expect(result).toEqual({
62
+ crawler: 'jina',
63
+ data: mockResult,
64
+ originalUrl: 'https://example.com',
65
+ transformedUrl: undefined,
66
+ });
67
+ });
68
+
69
+ it('should handle crawl errors', async () => {
70
+ const mockError = new Error('Crawl failed');
71
+ mockError.name = 'CrawlError';
72
+
73
+ const { crawlImpls } = await import('../crawImpl');
74
+ vi.mocked(crawlImpls.naive).mockRejectedValue(mockError);
75
+ vi.mocked(crawlImpls.jina).mockRejectedValue(mockError);
76
+ vi.mocked(crawlImpls.browserless).mockRejectedValue(mockError);
77
+
78
+ const result = await crawler.crawl({
79
+ url: 'https://example.com',
80
+ });
81
+
82
+ expect(result).toEqual({
83
+ content: 'Fail to crawl the page. Error type: CrawlError, error message: Crawl failed',
84
+ errorMessage: 'Crawl failed',
85
+ errorType: 'CrawlError',
86
+ originalUrl: 'https://example.com',
87
+ transformedUrl: undefined,
88
+ });
89
+ });
90
+
91
+ it('should handle transformed urls', async () => {
92
+ const mockResult = {
93
+ content: 'test content',
94
+ contentType: 'text' as const,
95
+ url: 'https://transformed.example.com',
96
+ };
97
+
98
+ const { crawlImpls } = await import('../crawImpl');
99
+ vi.mocked(crawlImpls.naive).mockResolvedValue(mockResult);
100
+
101
+ const { applyUrlRules } = await import('../utils/appUrlRules');
102
+ vi.mocked(applyUrlRules).mockReturnValue({
103
+ transformedUrl: 'https://transformed.example.com',
104
+ filterOptions: {},
105
+ });
106
+
107
+ const result = await crawler.crawl({
108
+ url: 'https://example.com',
109
+ });
110
+
111
+ expect(result).toEqual({
112
+ crawler: 'naive',
113
+ data: mockResult,
114
+ originalUrl: 'https://example.com',
115
+ transformedUrl: 'https://transformed.example.com',
116
+ });
117
+ });
118
+
119
+ it('should merge filter options correctly', async () => {
120
+ const mockResult = {
121
+ content: 'test content',
122
+ contentType: 'text' as const,
123
+ url: 'https://example.com',
124
+ };
125
+
126
+ const { crawlImpls } = await import('../crawImpl');
127
+ const mockCrawlImpl = vi.mocked(crawlImpls.naive).mockResolvedValue(mockResult);
128
+
129
+ const { applyUrlRules } = await import('../utils/appUrlRules');
130
+ vi.mocked(applyUrlRules).mockReturnValue({
131
+ transformedUrl: 'https://example.com',
132
+ filterOptions: { pureText: true },
133
+ });
134
+
135
+ await crawler.crawl({
136
+ url: 'https://example.com',
137
+ filterOptions: { enableReadability: true },
138
+ });
139
+
140
+ expect(mockCrawlImpl).toHaveBeenCalledWith('https://example.com', {
141
+ filterOptions: {
142
+ pureText: true,
143
+ enableReadability: true,
144
+ },
145
+ });
146
+ });
147
+
148
+ it('should use rule impls when provided', async () => {
149
+ const mockResult = {
150
+ content: 'test content',
151
+ contentType: 'text' as const,
152
+ url: 'https://example.com',
153
+ };
154
+
155
+ const { crawlImpls } = await import('../crawImpl');
156
+ vi.mocked(crawlImpls.jina).mockResolvedValue(mockResult);
157
+
158
+ const { applyUrlRules } = await import('../utils/appUrlRules');
159
+ vi.mocked(applyUrlRules).mockReturnValue({
160
+ transformedUrl: 'https://example.com',
161
+ filterOptions: {},
162
+ impls: ['jina'],
163
+ });
164
+
165
+ const result = await crawler.crawl({
166
+ url: 'https://example.com',
167
+ });
168
+
169
+ expect(result).toEqual({
170
+ crawler: 'jina',
171
+ data: mockResult,
172
+ originalUrl: 'https://example.com',
173
+ transformedUrl: undefined,
174
+ });
175
+ });
176
+ });
@@ -12,15 +12,19 @@ export class Crawler {
12
12
  */
13
13
  async crawl({
14
14
  url,
15
- impls,
15
+ impls: userImpls,
16
16
  filterOptions: userFilterOptions,
17
17
  }: {
18
18
  filterOptions?: CrawlUrlRule['filterOptions'];
19
- impls?: string[];
19
+ impls?: CrawlImplType[];
20
20
  url: string;
21
21
  }) {
22
22
  // 应用URL规则
23
- const { transformedUrl, filterOptions: ruleFilterOptions } = applyUrlRules(url, crawUrlRules);
23
+ const {
24
+ transformedUrl,
25
+ filterOptions: ruleFilterOptions,
26
+ impls: ruleImpls,
27
+ } = applyUrlRules(url, crawUrlRules);
24
28
 
25
29
  // 合并用户提供的过滤选项和规则中的过滤选项,用户选项优先
26
30
  const mergedFilterOptions = {
@@ -30,9 +34,11 @@ export class Crawler {
30
34
 
31
35
  let finalError: Error | undefined;
32
36
 
33
- const finalImpls = impls
34
- ? (impls.filter((impl) => Object.keys(crawlImpls).includes(impl)) as CrawlImplType[])
35
- : this.impls;
37
+ const systemImpls = (ruleImpls ?? this.impls) as CrawlImplType[];
38
+
39
+ const finalImpls = userImpls
40
+ ? (userImpls.filter((impl) => Object.keys(crawlImpls).includes(impl)) as CrawlImplType[])
41
+ : systemImpls;
36
42
 
37
43
  // 按照内置的实现顺序依次尝试
38
44
  for (const impl of finalImpls) {
@@ -21,6 +21,8 @@ export interface FilterOptions {
21
21
  pureText?: boolean;
22
22
  }
23
23
 
24
+ type CrawlImplType = 'naive' | 'jina' | 'browserless';
25
+
24
26
  type CrawlImplParams<T> = T & {
25
27
  filterOptions: FilterOptions;
26
28
  };
@@ -33,6 +35,7 @@ export type CrawlImpl<Params = object> = (
33
35
  export interface CrawlUrlRule {
34
36
  // 内容过滤配置(可选)
35
37
  filterOptions?: FilterOptions;
38
+ impls?: CrawlImplType[];
36
39
  // 是否使用正则表达式匹配(默认为glob模式)
37
40
  isRegex?: boolean;
38
41
  // URL匹配模式,支持glob模式或正则表达式
@@ -16,6 +16,17 @@ export const crawUrlRules: CrawlUrlRule[] = [
16
16
  // GitHub discussion
17
17
  urlPattern: 'https://github.com/(.*)/discussions/(.*)',
18
18
  },
19
+
20
+ // 所有 PDF 都用 jina
21
+ {
22
+ impls: ['jina'],
23
+ urlPattern: 'https://(.*).pdf',
24
+ },
25
+ // 知乎有爬虫防护,使用 jina
26
+ {
27
+ impls: ['jina'],
28
+ urlPattern: 'https://zhuanlan.zhihu.com(.*)',
29
+ },
19
30
  {
20
31
  // Medium 文章转换为 Scribe.rip
21
32
  urlPattern: 'https://medium.com/(.*)',
@@ -1,6 +1,7 @@
1
1
  import { applyUrlRules } from './appUrlRules';
2
2
 
3
3
  describe('applyUrlRules', () => {
4
+ // @gru-agent github file rules 不要改
4
5
  it('github file rules', () => {
5
6
  const result = applyUrlRules(
6
7
  'https://github.com/lobehub/chat-plugin-web-crawler/blob/main/api/v1/_utils.ts',
@@ -23,4 +24,79 @@ describe('applyUrlRules', () => {
23
24
  'https://github.com/lobehub/chat-plugin-web-crawler/raw/refs/heads/main/api/v1/_utils.ts',
24
25
  });
25
26
  });
27
+
28
+ it('should return original url when no rules match', () => {
29
+ const result = applyUrlRules('https://example.com', [
30
+ {
31
+ urlPattern: 'https://github.com/.*',
32
+ },
33
+ ]);
34
+
35
+ expect(result).toEqual({
36
+ transformedUrl: 'https://example.com',
37
+ });
38
+ });
39
+
40
+ it('should return original url with filter options when rule matches without transform', () => {
41
+ const result = applyUrlRules('https://example.com', [
42
+ {
43
+ filterOptions: { pureText: true },
44
+ urlPattern: 'https://example.com',
45
+ },
46
+ ]);
47
+
48
+ expect(result).toEqual({
49
+ filterOptions: { pureText: true },
50
+ transformedUrl: 'https://example.com',
51
+ });
52
+ });
53
+
54
+ it('should apply first matching rule when multiple rules match', () => {
55
+ const result = applyUrlRules('https://example.com/test', [
56
+ {
57
+ filterOptions: { pureText: true },
58
+ urlPattern: 'https://example.com/(.*)',
59
+ urlTransform: 'https://example.com/transformed/$1',
60
+ },
61
+ {
62
+ filterOptions: { enableReadability: true },
63
+ urlPattern: 'https://example.com/.*',
64
+ urlTransform: 'https://example.com/other',
65
+ },
66
+ ]);
67
+
68
+ expect(result).toEqual({
69
+ filterOptions: { pureText: true },
70
+ transformedUrl: 'https://example.com/transformed/test',
71
+ });
72
+ });
73
+
74
+ it('should handle special characters in URLs and patterns', () => {
75
+ const result = applyUrlRules('https://example.com/path?q=1&b=2#hash', [
76
+ {
77
+ urlPattern: 'https://example.com/([^?#]+)[?#]?.*',
78
+ urlTransform: 'https://example.com/clean/$1',
79
+ },
80
+ ]);
81
+
82
+ expect(result).toEqual({
83
+ transformedUrl: 'https://example.com/clean/path',
84
+ });
85
+ });
86
+
87
+ it('should handle impls in rules', () => {
88
+ const result = applyUrlRules('https://example.com', [
89
+ {
90
+ filterOptions: { pureText: true },
91
+ impls: ['naive', 'browserless'],
92
+ urlPattern: 'https://example.com',
93
+ },
94
+ ]);
95
+
96
+ expect(result).toEqual({
97
+ filterOptions: { pureText: true },
98
+ impls: ['naive', 'browserless'],
99
+ transformedUrl: 'https://example.com',
100
+ });
101
+ });
26
102
  });
@@ -5,6 +5,7 @@ export const applyUrlRules = (
5
5
  urlRules: CrawlUrlRule[],
6
6
  ): {
7
7
  filterOptions?: CrawlUrlRule['filterOptions'];
8
+ impls?: string[];
8
9
  transformedUrl: string;
9
10
  } => {
10
11
  for (const rule of urlRules) {
@@ -23,12 +24,14 @@ export const applyUrlRules = (
23
24
 
24
25
  return {
25
26
  filterOptions: rule.filterOptions,
27
+ impls: rule.impls,
26
28
  transformedUrl,
27
29
  };
28
30
  } else {
29
31
  // 没有转换规则但匹配了模式,只返回过滤选项
30
32
  return {
31
33
  filterOptions: rule.filterOptions,
34
+ impls: rule.impls,
32
35
  transformedUrl: url,
33
36
  };
34
37
  }
@@ -21,6 +21,7 @@ import {
21
21
  NvidiaProviderCard,
22
22
  OpenRouterProviderCard,
23
23
  PerplexityProviderCard,
24
+ PPIOProviderCard,
24
25
  QwenProviderCard,
25
26
  SambaNovaProviderCard,
26
27
  SenseNovaProviderCard,
@@ -98,6 +99,7 @@ export const useProviderList = (): ProviderItem[] => {
98
99
  SiliconCloudProviderCard,
99
100
  HigressProviderCard,
100
101
  GiteeAIProviderCard,
102
+ PPIOProviderCard,
101
103
  ],
102
104
  [
103
105
  AzureProvider,
@@ -30,6 +30,7 @@ import { default as ollama } from './ollama';
30
30
  import { default as openai } from './openai';
31
31
  import { default as openrouter } from './openrouter';
32
32
  import { default as perplexity } from './perplexity';
33
+ import { default as ppio } from './ppio';
33
34
  import { default as qwen } from './qwen';
34
35
  import { default as sambanova } from './sambanova';
35
36
  import { default as sensenova } from './sensenova';
@@ -98,6 +99,7 @@ export const LOBE_DEFAULT_MODEL_LIST = buildDefaultModelList({
98
99
  openai,
99
100
  openrouter,
100
101
  perplexity,
102
+ ppio,
101
103
  qwen,
102
104
  sambanova,
103
105
  sensenova,
@@ -147,6 +149,7 @@ export { default as ollama } from './ollama';
147
149
  export { default as openai } from './openai';
148
150
  export { default as openrouter } from './openrouter';
149
151
  export { default as perplexity } from './perplexity';
152
+ export { default as ppio } from './ppio';
150
153
  export { default as qwen } from './qwen';
151
154
  export { default as sambanova } from './sambanova';
152
155
  export { default as sensenova } from './sensenova';