entari-plugin-hyw 3.4.2__py3-none-any.whl → 3.5.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/__init__.py +78 -158
- entari_plugin_hyw/assets/card-dist/index.html +396 -0
- entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
- entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
- entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
- entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
- entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
- entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
- entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
- entari_plugin_hyw/card-ui/.gitignore +24 -0
- entari_plugin_hyw/card-ui/README.md +5 -0
- entari_plugin_hyw/card-ui/index.html +16 -0
- entari_plugin_hyw/card-ui/package-lock.json +2342 -0
- entari_plugin_hyw/card-ui/package.json +31 -0
- entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
- entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
- entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
- entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
- entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
- entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
- entari_plugin_hyw/card-ui/public/vite.svg +1 -0
- entari_plugin_hyw/card-ui/src/App.vue +410 -0
- entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
- entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +385 -0
- entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
- entari_plugin_hyw/card-ui/src/components/StageCard.vue +183 -0
- entari_plugin_hyw/card-ui/src/main.ts +5 -0
- entari_plugin_hyw/card-ui/src/style.css +8 -0
- entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
- entari_plugin_hyw/card-ui/src/types.ts +52 -0
- entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
- entari_plugin_hyw/card-ui/tsconfig.json +7 -0
- entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
- entari_plugin_hyw/card-ui/vite.config.ts +16 -0
- entari_plugin_hyw/{core/history.py → history.py} +25 -1
- entari_plugin_hyw/image_cache.py +283 -0
- entari_plugin_hyw/{utils/misc.py → misc.py} +0 -3
- entari_plugin_hyw/{core/pipeline.py → pipeline.py} +236 -86
- entari_plugin_hyw/{utils/prompts_cn.py → prompts.py} +10 -25
- entari_plugin_hyw/render_vue.py +314 -0
- entari_plugin_hyw/{utils/search.py → search.py} +227 -10
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/METADATA +5 -2
- entari_plugin_hyw-3.5.0rc2.dist-info/RECORD +88 -0
- entari_plugin_hyw/assets/libs/highlight.css +0 -10
- entari_plugin_hyw/assets/libs/highlight.js +0 -1213
- entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
- entari_plugin_hyw/assets/libs/katex.css +0 -1
- entari_plugin_hyw/assets/libs/katex.js +0 -1
- entari_plugin_hyw/assets/libs/tailwind.css +0 -1
- entari_plugin_hyw/assets/package-lock.json +0 -953
- entari_plugin_hyw/assets/package.json +0 -16
- entari_plugin_hyw/assets/tailwind.config.js +0 -12
- entari_plugin_hyw/assets/tailwind.input.css +0 -235
- entari_plugin_hyw/assets/template.html +0 -157
- entari_plugin_hyw/assets/template.html.bak +0 -157
- entari_plugin_hyw/assets/template.j2 +0 -400
- entari_plugin_hyw/core/__init__.py +0 -0
- entari_plugin_hyw/core/config.py +0 -38
- entari_plugin_hyw/core/hyw.py +0 -48
- entari_plugin_hyw/core/render.py +0 -630
- entari_plugin_hyw/utils/__init__.py +0 -2
- entari_plugin_hyw/utils/browser.py +0 -40
- entari_plugin_hyw/utils/playwright_tool.py +0 -36
- entari_plugin_hyw/utils/prompts.py +0 -119
- entari_plugin_hyw-3.4.2.dist-info/RECORD +0 -49
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/WHEEL +0 -0
- {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
|
|
2
|
+
const stripPrefixBeforeH1 = (text) => {
|
|
3
|
+
const h1Match = text.match(/^#\s+/m)
|
|
4
|
+
if (h1Match && h1Match.index !== undefined) {
|
|
5
|
+
return text.substring(h1Match.index)
|
|
6
|
+
}
|
|
7
|
+
return text
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const dedent = (text) => {
|
|
11
|
+
const lines = text.split('\n')
|
|
12
|
+
// Find minimum indentation of non-empty lines
|
|
13
|
+
let minIndent = Infinity
|
|
14
|
+
for (const line of lines) {
|
|
15
|
+
if (line.trim().length === 0) continue
|
|
16
|
+
const leadingSpace = line.match(/^\s*/)?.[0].length || 0
|
|
17
|
+
if (leadingSpace < minIndent) minIndent = leadingSpace
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
if (minIndent === Infinity || minIndent === 0) return text
|
|
21
|
+
|
|
22
|
+
return lines.map(line => {
|
|
23
|
+
if (line.trim().length === 0) return ''
|
|
24
|
+
return line.substring(minIndent)
|
|
25
|
+
}).join('\n')
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const parse = (rawMd) => {
|
|
29
|
+
if (!rawMd) return []
|
|
30
|
+
|
|
31
|
+
const md = stripPrefixBeforeH1(rawMd)
|
|
32
|
+
|
|
33
|
+
let content = md.replace(/^#\s+.+$/m, '')
|
|
34
|
+
content = content.replace(/(?:^|\n)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources)[\s\S]*$/i, '')
|
|
35
|
+
content = content.trim()
|
|
36
|
+
|
|
37
|
+
const sections = []
|
|
38
|
+
|
|
39
|
+
const combinedRegex = /(```[\s\S]*?```|((?:^|\n)\|[^\n]*\|(?:\n\|[^\n]*\|)*)|<summary>[\s\S]*?<\/summary>)/
|
|
40
|
+
|
|
41
|
+
let remaining = content
|
|
42
|
+
|
|
43
|
+
while (remaining) {
|
|
44
|
+
const match = remaining.match(combinedRegex)
|
|
45
|
+
if (!match) {
|
|
46
|
+
if (remaining.trim()) {
|
|
47
|
+
sections.push({ type: 'markdown', content: remaining.trim() })
|
|
48
|
+
}
|
|
49
|
+
break
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const index = match.index
|
|
53
|
+
const matchedStr = match[0]
|
|
54
|
+
const preText = remaining.substring(0, index)
|
|
55
|
+
|
|
56
|
+
if (preText.trim()) {
|
|
57
|
+
sections.push({ type: 'markdown', content: preText.trim() })
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const isCode = matchedStr.startsWith('```')
|
|
61
|
+
const isSummary = matchedStr.startsWith('<summary>')
|
|
62
|
+
const isTable = !isCode && !isSummary && matchedStr.trim().startsWith('|')
|
|
63
|
+
|
|
64
|
+
if (isCode || isTable || isSummary) {
|
|
65
|
+
let language = ''
|
|
66
|
+
let content = matchedStr.trim()
|
|
67
|
+
|
|
68
|
+
if (isCode) {
|
|
69
|
+
const match = matchedStr.match(/^```(\w+)/)
|
|
70
|
+
if (match && match[1]) language = match[1]
|
|
71
|
+
} else if (isSummary) {
|
|
72
|
+
content = content.replace(/^<summary>/, '').replace(/<\/summary>$/, '')
|
|
73
|
+
content = dedent(content)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
sections.push({
|
|
77
|
+
type: 'card',
|
|
78
|
+
title: isCode ? 'Code' : (isSummary ? 'Summary' : 'Table'),
|
|
79
|
+
content: content,
|
|
80
|
+
contentType: isCode ? 'code' : (isSummary ? 'summary' : 'table'),
|
|
81
|
+
language: language
|
|
82
|
+
})
|
|
83
|
+
} else {
|
|
84
|
+
sections.push({ type: 'markdown', content: matchedStr })
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
remaining = remaining.substring(index + matchedStr.length)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return sections
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const test1 = `
|
|
94
|
+
# Title
|
|
95
|
+
|
|
96
|
+
<summary>
|
|
97
|
+
Indented text.
|
|
98
|
+
It might become code block.
|
|
99
|
+
</summary>
|
|
100
|
+
`
|
|
101
|
+
|
|
102
|
+
console.log("\n--- Test 2 (After Fix) ---")
|
|
103
|
+
console.log(JSON.stringify(parse(test1), null, 2))
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// Type definitions for render data
|
|
2
|
+
// Python only passes raw data, all processing happens in frontend
|
|
3
|
+
|
|
4
|
+
export interface Stage {
|
|
5
|
+
name: string
|
|
6
|
+
model: string
|
|
7
|
+
provider: string
|
|
8
|
+
icon_name?: string // Icon identifier (e.g., "google", "openai")
|
|
9
|
+
time: number // Time in seconds (raw number)
|
|
10
|
+
cost: number // Cost in dollars (raw number)
|
|
11
|
+
references?: Reference[]
|
|
12
|
+
image_references?: ImageReference[]
|
|
13
|
+
crawled_pages?: CrawledPage[]
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface Reference {
|
|
17
|
+
title: string
|
|
18
|
+
url: string
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface ImageReference {
|
|
22
|
+
title: string
|
|
23
|
+
url: string
|
|
24
|
+
thumbnail?: string
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface CrawledPage {
|
|
28
|
+
title: string
|
|
29
|
+
url: string
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface Stats {
|
|
33
|
+
total_time?: number
|
|
34
|
+
vision_duration?: number
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface Flags {
|
|
38
|
+
has_vision: boolean
|
|
39
|
+
has_search: boolean
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Raw data from Python - minimal processing
|
|
43
|
+
export interface RenderData {
|
|
44
|
+
markdown: string // Raw markdown content
|
|
45
|
+
stages: Stage[]
|
|
46
|
+
references: Reference[] // All references for citation
|
|
47
|
+
page_references: Reference[]
|
|
48
|
+
image_references: ImageReference[]
|
|
49
|
+
stats: Stats
|
|
50
|
+
total_time: number
|
|
51
|
+
theme_color?: string // Configurable theme color (hex)
|
|
52
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "@vue/tsconfig/tsconfig.dom.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
|
5
|
+
"types": ["vite/client"],
|
|
6
|
+
|
|
7
|
+
/* Linting */
|
|
8
|
+
"strict": true,
|
|
9
|
+
"noUnusedLocals": true,
|
|
10
|
+
"noUnusedParameters": true,
|
|
11
|
+
"erasableSyntaxOnly": true,
|
|
12
|
+
"noFallthroughCasesInSwitch": true,
|
|
13
|
+
"noUncheckedSideEffectImports": true
|
|
14
|
+
},
|
|
15
|
+
"include": ["src/**/*.ts", "src/**/*.tsx", "src/**/*.vue"]
|
|
16
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
|
4
|
+
"target": "ES2023",
|
|
5
|
+
"lib": ["ES2023"],
|
|
6
|
+
"module": "ESNext",
|
|
7
|
+
"types": ["node"],
|
|
8
|
+
"skipLibCheck": true,
|
|
9
|
+
|
|
10
|
+
/* Bundler mode */
|
|
11
|
+
"moduleResolution": "bundler",
|
|
12
|
+
"allowImportingTsExtensions": true,
|
|
13
|
+
"verbatimModuleSyntax": true,
|
|
14
|
+
"moduleDetection": "force",
|
|
15
|
+
"noEmit": true,
|
|
16
|
+
|
|
17
|
+
/* Linting */
|
|
18
|
+
"strict": true,
|
|
19
|
+
"noUnusedLocals": true,
|
|
20
|
+
"noUnusedParameters": true,
|
|
21
|
+
"erasableSyntaxOnly": true,
|
|
22
|
+
"noFallthroughCasesInSwitch": true,
|
|
23
|
+
"noUncheckedSideEffectImports": true
|
|
24
|
+
},
|
|
25
|
+
"include": ["vite.config.ts"]
|
|
26
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { defineConfig } from 'vite'
|
|
2
|
+
import vue from '@vitejs/plugin-vue'
|
|
3
|
+
import tailwindcss from '@tailwindcss/vite'
|
|
4
|
+
import { viteSingleFile } from 'vite-plugin-singlefile'
|
|
5
|
+
|
|
6
|
+
export default defineConfig({
|
|
7
|
+
plugins: [
|
|
8
|
+
vue(),
|
|
9
|
+
tailwindcss(),
|
|
10
|
+
viteSingleFile(),
|
|
11
|
+
],
|
|
12
|
+
build: {
|
|
13
|
+
outDir: '../assets/card-dist',
|
|
14
|
+
emptyOutDir: true,
|
|
15
|
+
},
|
|
16
|
+
})
|
|
@@ -79,13 +79,37 @@ class HistoryManager:
|
|
|
79
79
|
"""Save conversation history to disk"""
|
|
80
80
|
import os
|
|
81
81
|
import time
|
|
82
|
+
import re
|
|
82
83
|
|
|
83
84
|
if key not in self._history:
|
|
84
85
|
return
|
|
85
86
|
|
|
86
87
|
try:
|
|
87
88
|
os.makedirs(save_dir, exist_ok=True)
|
|
88
|
-
|
|
89
|
+
|
|
90
|
+
# Extract user's first message (question) for filename
|
|
91
|
+
user_question = ""
|
|
92
|
+
for msg in self._history[key]:
|
|
93
|
+
if msg.get("role") == "user":
|
|
94
|
+
content = msg.get("content", "")
|
|
95
|
+
# Handle content that might be a list (multimodal)
|
|
96
|
+
if isinstance(content, list):
|
|
97
|
+
for item in content:
|
|
98
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
99
|
+
user_question = item.get("text", "")
|
|
100
|
+
break
|
|
101
|
+
else:
|
|
102
|
+
user_question = str(content)
|
|
103
|
+
break
|
|
104
|
+
|
|
105
|
+
# Clean and truncate question for filename (10 chars)
|
|
106
|
+
question_part = re.sub(r'[\\/:*?"<>|\n\r\t]', '', user_question)[:10].strip()
|
|
107
|
+
if not question_part:
|
|
108
|
+
question_part = "conversation"
|
|
109
|
+
|
|
110
|
+
# Format: YYYYMMDD_HHMMSS_question.md
|
|
111
|
+
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
|
|
112
|
+
filename = f"{save_dir}/{time_str}_{question_part}.md"
|
|
89
113
|
|
|
90
114
|
# Formatter
|
|
91
115
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Image Caching Module for Pre-downloading Images
|
|
3
|
+
|
|
4
|
+
This module provides async image pre-download functionality to reduce render time.
|
|
5
|
+
Images are downloaded in the background when search results are obtained,
|
|
6
|
+
and cached as base64 data URLs for instant use during rendering.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import base64
|
|
11
|
+
import hashlib
|
|
12
|
+
from typing import Dict, List, Optional, Any
|
|
13
|
+
from loguru import logger
|
|
14
|
+
import httpx
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ImageCache:
|
|
19
|
+
"""
|
|
20
|
+
Async image cache that pre-downloads images as base64.
|
|
21
|
+
|
|
22
|
+
Usage:
|
|
23
|
+
cache = ImageCache()
|
|
24
|
+
|
|
25
|
+
# Start pre-downloading images (non-blocking)
|
|
26
|
+
cache.start_prefetch(image_urls)
|
|
27
|
+
|
|
28
|
+
# Later, get cached image (blocking if not ready)
|
|
29
|
+
cached_url = await cache.get_cached(url) # Returns data:image/... or original URL
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
max_size_kb: int = 500, # Max image size to cache (KB)
|
|
35
|
+
timeout: float = 5.0, # Download timeout per image
|
|
36
|
+
max_concurrent: int = 6, # Max concurrent downloads
|
|
37
|
+
):
|
|
38
|
+
self.max_size_bytes = max_size_kb * 1024
|
|
39
|
+
self.timeout = timeout
|
|
40
|
+
self.max_concurrent = max_concurrent
|
|
41
|
+
|
|
42
|
+
# Cache storage: url -> base64_data_url or None (if failed)
|
|
43
|
+
self._cache: Dict[str, Optional[str]] = {}
|
|
44
|
+
# Pending downloads: url -> asyncio.Task
|
|
45
|
+
self._pending: Dict[str, asyncio.Task] = {}
|
|
46
|
+
# Semaphore for concurrent downloads
|
|
47
|
+
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
48
|
+
# Lock for cache access
|
|
49
|
+
self._lock = asyncio.Lock()
|
|
50
|
+
|
|
51
|
+
def start_prefetch(self, urls: List[str]) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Start pre-downloading images in the background (non-blocking).
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
urls: List of image URLs to prefetch
|
|
57
|
+
"""
|
|
58
|
+
if not httpx:
|
|
59
|
+
logger.warning("ImageCache: httpx not installed, prefetch disabled")
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
for url in urls:
|
|
63
|
+
if not url or not url.startswith("http"):
|
|
64
|
+
continue
|
|
65
|
+
if url in self._cache or url in self._pending:
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
# Create background task
|
|
69
|
+
task = asyncio.create_task(self._download_image(url))
|
|
70
|
+
self._pending[url] = task
|
|
71
|
+
|
|
72
|
+
async def _download_image(self, url: str) -> Optional[str]:
|
|
73
|
+
"""
|
|
74
|
+
Download a single image and convert to base64.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Base64 data URL or None if failed/too large
|
|
78
|
+
"""
|
|
79
|
+
async with self._semaphore:
|
|
80
|
+
try:
|
|
81
|
+
async with httpx.AsyncClient(timeout=self.timeout, follow_redirects=True) as client:
|
|
82
|
+
resp = await client.get(url, headers={
|
|
83
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
|
|
84
|
+
})
|
|
85
|
+
resp.raise_for_status()
|
|
86
|
+
|
|
87
|
+
# Check content length
|
|
88
|
+
content_length = resp.headers.get("content-length")
|
|
89
|
+
if content_length and int(content_length) > self.max_size_bytes:
|
|
90
|
+
logger.debug(f"ImageCache: Skipping {url} (too large: {content_length} bytes)")
|
|
91
|
+
async with self._lock:
|
|
92
|
+
self._cache[url] = None
|
|
93
|
+
self._pending.pop(url, None)
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
# Read content
|
|
97
|
+
content = resp.content
|
|
98
|
+
if len(content) > self.max_size_bytes:
|
|
99
|
+
logger.debug(f"ImageCache: Skipping {url} (content too large: {len(content)} bytes)")
|
|
100
|
+
async with self._lock:
|
|
101
|
+
self._cache[url] = None
|
|
102
|
+
self._pending.pop(url, None)
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
# Determine MIME type
|
|
106
|
+
content_type = resp.headers.get("content-type", "").lower()
|
|
107
|
+
if "jpeg" in content_type or "jpg" in content_type:
|
|
108
|
+
mime = "image/jpeg"
|
|
109
|
+
elif "png" in content_type:
|
|
110
|
+
mime = "image/png"
|
|
111
|
+
elif "gif" in content_type:
|
|
112
|
+
mime = "image/gif"
|
|
113
|
+
elif "webp" in content_type:
|
|
114
|
+
mime = "image/webp"
|
|
115
|
+
elif "svg" in content_type:
|
|
116
|
+
mime = "image/svg+xml"
|
|
117
|
+
else:
|
|
118
|
+
# Try to infer from URL
|
|
119
|
+
url_lower = url.lower()
|
|
120
|
+
if ".jpg" in url_lower or ".jpeg" in url_lower:
|
|
121
|
+
mime = "image/jpeg"
|
|
122
|
+
elif ".png" in url_lower:
|
|
123
|
+
mime = "image/png"
|
|
124
|
+
elif ".gif" in url_lower:
|
|
125
|
+
mime = "image/gif"
|
|
126
|
+
elif ".webp" in url_lower:
|
|
127
|
+
mime = "image/webp"
|
|
128
|
+
elif ".svg" in url_lower:
|
|
129
|
+
mime = "image/svg+xml"
|
|
130
|
+
else:
|
|
131
|
+
mime = "image/jpeg" # Default fallback
|
|
132
|
+
|
|
133
|
+
# Encode to base64
|
|
134
|
+
b64 = base64.b64encode(content).decode("utf-8")
|
|
135
|
+
data_url = f"data:{mime};base64,{b64}"
|
|
136
|
+
|
|
137
|
+
async with self._lock:
|
|
138
|
+
self._cache[url] = data_url
|
|
139
|
+
self._pending.pop(url, None)
|
|
140
|
+
|
|
141
|
+
logger.debug(f"ImageCache: Cached {url} ({len(content)} bytes)")
|
|
142
|
+
return data_url
|
|
143
|
+
|
|
144
|
+
except asyncio.TimeoutError:
|
|
145
|
+
logger.debug(f"ImageCache: Timeout downloading {url}")
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.debug(f"ImageCache: Failed to download {url}: {e}")
|
|
148
|
+
|
|
149
|
+
async with self._lock:
|
|
150
|
+
self._cache[url] = None
|
|
151
|
+
self._pending.pop(url, None)
|
|
152
|
+
return None
|
|
153
|
+
|
|
154
|
+
async def get_cached(self, url: str, wait: bool = True, wait_timeout: float = 3.0) -> str:
|
|
155
|
+
"""
|
|
156
|
+
Get cached image data URL, or original URL if not cached.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
url: Original image URL
|
|
160
|
+
wait: If True, wait for pending download to complete
|
|
161
|
+
wait_timeout: Max time to wait for pending download
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Cached data URL or original URL
|
|
165
|
+
"""
|
|
166
|
+
if not url:
|
|
167
|
+
return url
|
|
168
|
+
|
|
169
|
+
# Check if already cached
|
|
170
|
+
async with self._lock:
|
|
171
|
+
if url in self._cache:
|
|
172
|
+
cached = self._cache[url]
|
|
173
|
+
return cached if cached else url # Return original if cached as None (failed)
|
|
174
|
+
|
|
175
|
+
pending_task = self._pending.get(url)
|
|
176
|
+
|
|
177
|
+
# Wait for pending download if requested
|
|
178
|
+
if pending_task and wait:
|
|
179
|
+
try:
|
|
180
|
+
await asyncio.wait_for(asyncio.shield(pending_task), timeout=wait_timeout)
|
|
181
|
+
async with self._lock:
|
|
182
|
+
cached = self._cache.get(url)
|
|
183
|
+
return cached if cached else url
|
|
184
|
+
except asyncio.TimeoutError:
|
|
185
|
+
logger.debug(f"ImageCache: Timeout waiting for {url}")
|
|
186
|
+
return url
|
|
187
|
+
except Exception:
|
|
188
|
+
return url
|
|
189
|
+
|
|
190
|
+
return url
|
|
191
|
+
|
|
192
|
+
async def get_all_cached(self, urls: List[str], wait_timeout: float = 3.0) -> Dict[str, str]:
|
|
193
|
+
"""
|
|
194
|
+
Get cached URLs for multiple images.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
urls: List of original URLs
|
|
198
|
+
wait_timeout: Max time to wait for all pending downloads
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Dict mapping original URL to cached data URL (or original if not cached)
|
|
202
|
+
"""
|
|
203
|
+
result = {}
|
|
204
|
+
|
|
205
|
+
# Wait for all pending downloads first
|
|
206
|
+
pending_tasks = []
|
|
207
|
+
async with self._lock:
|
|
208
|
+
for url in urls:
|
|
209
|
+
if url in self._pending:
|
|
210
|
+
pending_tasks.append(self._pending[url])
|
|
211
|
+
|
|
212
|
+
if pending_tasks:
|
|
213
|
+
try:
|
|
214
|
+
await asyncio.wait_for(
|
|
215
|
+
asyncio.gather(*pending_tasks, return_exceptions=True),
|
|
216
|
+
timeout=wait_timeout
|
|
217
|
+
)
|
|
218
|
+
except asyncio.TimeoutError:
|
|
219
|
+
logger.debug(f"ImageCache: Timeout waiting for batch download")
|
|
220
|
+
|
|
221
|
+
# Collect results
|
|
222
|
+
for url in urls:
|
|
223
|
+
async with self._lock:
|
|
224
|
+
cached = self._cache.get(url)
|
|
225
|
+
result[url] = cached if cached else url
|
|
226
|
+
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
230
|
+
"""Get cache statistics."""
|
|
231
|
+
cached_count = sum(1 for v in self._cache.values() if v is not None)
|
|
232
|
+
failed_count = sum(1 for v in self._cache.values() if v is None)
|
|
233
|
+
return {
|
|
234
|
+
"cached": cached_count,
|
|
235
|
+
"failed": failed_count,
|
|
236
|
+
"pending": len(self._pending),
|
|
237
|
+
"total": len(self._cache) + len(self._pending),
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
def clear(self) -> None:
|
|
241
|
+
"""Clear all cached data."""
|
|
242
|
+
self._cache.clear()
|
|
243
|
+
for task in self._pending.values():
|
|
244
|
+
task.cancel()
|
|
245
|
+
self._pending.clear()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# Global cache instance for reuse across requests
|
|
249
|
+
_global_cache: Optional[ImageCache] = None
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def get_image_cache() -> ImageCache:
|
|
253
|
+
"""Get or create the global image cache instance."""
|
|
254
|
+
global _global_cache
|
|
255
|
+
if _global_cache is None:
|
|
256
|
+
_global_cache = ImageCache()
|
|
257
|
+
return _global_cache
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
async def prefetch_images(urls: List[str]) -> None:
|
|
261
|
+
"""
|
|
262
|
+
Convenience function to start prefetching images.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
urls: List of image URLs to prefetch
|
|
266
|
+
"""
|
|
267
|
+
cache = get_image_cache()
|
|
268
|
+
cache.start_prefetch(urls)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
async def get_cached_images(urls: List[str], wait_timeout: float = 3.0) -> Dict[str, str]:
|
|
272
|
+
"""
|
|
273
|
+
Convenience function to get cached images.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
urls: List of original URLs
|
|
277
|
+
wait_timeout: Max time to wait
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Dict mapping original URL to cached data URL
|
|
281
|
+
"""
|
|
282
|
+
cache = get_image_cache()
|
|
283
|
+
return await cache.get_all_cached(urls, wait_timeout=wait_timeout)
|