botrun-flow-lang 5.9.301__py3-none-any.whl → 5.10.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- botrun_flow_lang/api/auth_api.py +39 -39
- botrun_flow_lang/api/auth_utils.py +183 -183
- botrun_flow_lang/api/botrun_back_api.py +65 -65
- botrun_flow_lang/api/flow_api.py +3 -3
- botrun_flow_lang/api/hatch_api.py +481 -481
- botrun_flow_lang/api/langgraph_api.py +796 -796
- botrun_flow_lang/api/line_bot_api.py +1357 -1357
- botrun_flow_lang/api/model_api.py +300 -300
- botrun_flow_lang/api/rate_limit_api.py +32 -32
- botrun_flow_lang/api/routes.py +79 -79
- botrun_flow_lang/api/search_api.py +53 -53
- botrun_flow_lang/api/storage_api.py +316 -316
- botrun_flow_lang/api/subsidy_api.py +290 -290
- botrun_flow_lang/api/subsidy_api_system_prompt.txt +109 -109
- botrun_flow_lang/api/user_setting_api.py +70 -70
- botrun_flow_lang/api/version_api.py +31 -31
- botrun_flow_lang/api/youtube_api.py +26 -26
- botrun_flow_lang/constants.py +13 -13
- botrun_flow_lang/langgraph_agents/agents/agent_runner.py +174 -174
- botrun_flow_lang/langgraph_agents/agents/agent_tools/step_planner.py +77 -77
- botrun_flow_lang/langgraph_agents/agents/checkpointer/firestore_checkpointer.py +666 -666
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/GOV_RESEARCHER_PRD.md +192 -192
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_2_graph.py +1002 -1002
- botrun_flow_lang/langgraph_agents/agents/gov_researcher/gov_researcher_graph.py +822 -822
- botrun_flow_lang/langgraph_agents/agents/langgraph_react_agent.py +548 -542
- botrun_flow_lang/langgraph_agents/agents/search_agent_graph.py +864 -864
- botrun_flow_lang/langgraph_agents/agents/tools/__init__.py +4 -4
- botrun_flow_lang/langgraph_agents/agents/tools/gemini_code_execution.py +376 -376
- botrun_flow_lang/langgraph_agents/agents/util/gemini_grounding.py +66 -66
- botrun_flow_lang/langgraph_agents/agents/util/html_util.py +316 -316
- botrun_flow_lang/langgraph_agents/agents/util/img_util.py +294 -294
- botrun_flow_lang/langgraph_agents/agents/util/local_files.py +345 -345
- botrun_flow_lang/langgraph_agents/agents/util/mermaid_util.py +86 -86
- botrun_flow_lang/langgraph_agents/agents/util/model_utils.py +143 -143
- botrun_flow_lang/langgraph_agents/agents/util/pdf_analyzer.py +160 -160
- botrun_flow_lang/langgraph_agents/agents/util/perplexity_search.py +464 -464
- botrun_flow_lang/langgraph_agents/agents/util/plotly_util.py +59 -59
- botrun_flow_lang/langgraph_agents/agents/util/tavily_search.py +199 -199
- botrun_flow_lang/langgraph_agents/agents/util/youtube_util.py +90 -90
- botrun_flow_lang/langgraph_agents/cache/langgraph_botrun_cache.py +197 -197
- botrun_flow_lang/llm_agent/llm_agent.py +19 -19
- botrun_flow_lang/llm_agent/llm_agent_util.py +83 -83
- botrun_flow_lang/log/.gitignore +2 -2
- botrun_flow_lang/main.py +61 -61
- botrun_flow_lang/main_fast.py +51 -51
- botrun_flow_lang/mcp_server/__init__.py +10 -10
- botrun_flow_lang/mcp_server/default_mcp.py +711 -711
- botrun_flow_lang/models/nodes/utils.py +205 -205
- botrun_flow_lang/models/token_usage.py +34 -34
- botrun_flow_lang/requirements.txt +21 -21
- botrun_flow_lang/services/base/firestore_base.py +30 -30
- botrun_flow_lang/services/hatch/hatch_factory.py +11 -11
- botrun_flow_lang/services/hatch/hatch_fs_store.py +372 -372
- botrun_flow_lang/services/storage/storage_cs_store.py +202 -202
- botrun_flow_lang/services/storage/storage_factory.py +12 -12
- botrun_flow_lang/services/storage/storage_store.py +65 -65
- botrun_flow_lang/services/user_setting/user_setting_factory.py +9 -9
- botrun_flow_lang/services/user_setting/user_setting_fs_store.py +66 -66
- botrun_flow_lang/static/docs/tools/index.html +926 -926
- botrun_flow_lang/tests/api_functional_tests.py +1525 -1525
- botrun_flow_lang/tests/api_stress_test.py +357 -357
- botrun_flow_lang/tests/shared_hatch_tests.py +333 -333
- botrun_flow_lang/tests/test_botrun_app.py +46 -46
- botrun_flow_lang/tests/test_html_util.py +31 -31
- botrun_flow_lang/tests/test_img_analyzer.py +190 -190
- botrun_flow_lang/tests/test_img_util.py +39 -39
- botrun_flow_lang/tests/test_local_files.py +114 -114
- botrun_flow_lang/tests/test_mermaid_util.py +103 -103
- botrun_flow_lang/tests/test_pdf_analyzer.py +104 -104
- botrun_flow_lang/tests/test_plotly_util.py +151 -151
- botrun_flow_lang/tests/test_run_workflow_engine.py +65 -65
- botrun_flow_lang/tools/generate_docs.py +133 -133
- botrun_flow_lang/tools/templates/tools.html +153 -153
- botrun_flow_lang/utils/__init__.py +7 -7
- botrun_flow_lang/utils/botrun_logger.py +344 -344
- botrun_flow_lang/utils/clients/rate_limit_client.py +209 -209
- botrun_flow_lang/utils/clients/token_verify_client.py +153 -153
- botrun_flow_lang/utils/google_drive_utils.py +654 -654
- botrun_flow_lang/utils/langchain_utils.py +324 -324
- botrun_flow_lang/utils/yaml_utils.py +9 -9
- {botrun_flow_lang-5.9.301.dist-info → botrun_flow_lang-5.10.82.dist-info}/METADATA +2 -2
- botrun_flow_lang-5.10.82.dist-info/RECORD +99 -0
- botrun_flow_lang-5.9.301.dist-info/RECORD +0 -99
- {botrun_flow_lang-5.9.301.dist-info → botrun_flow_lang-5.10.82.dist-info}/WHEEL +0 -0
|
@@ -1,316 +1,316 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from tempfile import NamedTemporaryFile
|
|
3
|
-
from typing import Dict, Any, Optional, Tuple
|
|
4
|
-
import re
|
|
5
|
-
from urllib.parse import urlparse, urlunparse, unquote, parse_qs, urlencode
|
|
6
|
-
import time
|
|
7
|
-
from io import BytesIO
|
|
8
|
-
import requests
|
|
9
|
-
|
|
10
|
-
from botrun_flow_lang.constants import MODIFY_GCS_HTML_MODEL
|
|
11
|
-
from .local_files import upload_html_and_get_public_url
|
|
12
|
-
from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
async def generate_html_file(
|
|
16
|
-
html_content: str,
|
|
17
|
-
botrun_flow_lang_url: str,
|
|
18
|
-
user_id: str,
|
|
19
|
-
title: Optional[str] = None,
|
|
20
|
-
) -> str:
|
|
21
|
-
"""
|
|
22
|
-
Generate HTML file from complete HTML content (including JS and CSS) and upload it to GCS.
|
|
23
|
-
|
|
24
|
-
This function accepts complete HTML documents with JavaScript, CSS, and other elements.
|
|
25
|
-
You can pass either:
|
|
26
|
-
1. A complete HTML document (<!DOCTYPE html><html>...<head>...</head><body>...</body></html>)
|
|
27
|
-
2. HTML fragment that will be wrapped in a basic HTML structure if needed
|
|
28
|
-
|
|
29
|
-
The function preserves all JavaScript, CSS, and other elements in the HTML content.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
html_content: Complete HTML content string, including head/body tags, JavaScript, CSS, etc.
|
|
33
|
-
botrun_flow_lang_url: URL for the botrun flow lang API
|
|
34
|
-
user_id: User ID for file upload
|
|
35
|
-
title: Optional title for the HTML page (used only if the HTML doesn't already have a title)
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
str: URL for the HTML file or error message starting with "Error: "
|
|
39
|
-
"""
|
|
40
|
-
try:
|
|
41
|
-
# Check if the content is already a complete HTML document
|
|
42
|
-
is_complete_html = html_content.strip().lower().startswith(
|
|
43
|
-
"<!doctype html"
|
|
44
|
-
) or html_content.strip().lower().startswith("<html")
|
|
45
|
-
|
|
46
|
-
# Only process HTML content if it's not already a complete document
|
|
47
|
-
if not is_complete_html:
|
|
48
|
-
# If not a complete HTML document, check if it has a head tag
|
|
49
|
-
if "<head>" in html_content.lower():
|
|
50
|
-
# Has head tag but not complete doc, add title if needed and provided
|
|
51
|
-
if title and "<title>" not in html_content.lower():
|
|
52
|
-
html_content = html_content.replace(
|
|
53
|
-
"<head>", f"<head>\n <title>{title}</title>", 1
|
|
54
|
-
)
|
|
55
|
-
else:
|
|
56
|
-
# No head tag, wrap the content in a basic HTML structure
|
|
57
|
-
html_content = f"""
|
|
58
|
-
<!DOCTYPE html>
|
|
59
|
-
<html>
|
|
60
|
-
<head>
|
|
61
|
-
<meta charset="utf-8">
|
|
62
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
63
|
-
<title>{title if title else 'HTML Page'}</title>
|
|
64
|
-
<style>
|
|
65
|
-
body {{
|
|
66
|
-
font-family: "Microsoft JhengHei", "微軟正黑體", "Heiti TC", "黑體-繁", sans-serif;
|
|
67
|
-
}}
|
|
68
|
-
</style>
|
|
69
|
-
</head>
|
|
70
|
-
<body>
|
|
71
|
-
{html_content}
|
|
72
|
-
</body>
|
|
73
|
-
</html>
|
|
74
|
-
"""
|
|
75
|
-
# If we have complete HTML but title is provided and no title exists
|
|
76
|
-
elif title and "<title>" not in html_content.lower():
|
|
77
|
-
# Try to insert title into the head tag
|
|
78
|
-
if "<head>" in html_content.lower():
|
|
79
|
-
html_content = html_content.replace(
|
|
80
|
-
"<head>", f"<head>\n <title>{title}</title>", 1
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# Create temporary file
|
|
84
|
-
with NamedTemporaryFile(
|
|
85
|
-
suffix=".html", mode="w", encoding="utf-8", delete=False
|
|
86
|
-
) as html_temp:
|
|
87
|
-
try:
|
|
88
|
-
# Save HTML content
|
|
89
|
-
html_temp.write(html_content)
|
|
90
|
-
html_temp.flush()
|
|
91
|
-
|
|
92
|
-
# Upload file to GCS
|
|
93
|
-
html_url = await upload_html_and_get_public_url(
|
|
94
|
-
html_temp.name, botrun_flow_lang_url, user_id
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
# Clean up temporary file
|
|
98
|
-
os.unlink(html_temp.name)
|
|
99
|
-
|
|
100
|
-
return html_url
|
|
101
|
-
except Exception as e:
|
|
102
|
-
# Clean up temporary file in case of error
|
|
103
|
-
os.unlink(html_temp.name)
|
|
104
|
-
return f"Error: {str(e)}"
|
|
105
|
-
|
|
106
|
-
except Exception as e:
|
|
107
|
-
return f"Error: {str(e)}"
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
# todo 還沒改完,我的測試案例測到 3 之後,就不會再增加了
|
|
111
|
-
async def modify_gcs_html(
|
|
112
|
-
html_url: str,
|
|
113
|
-
modification_instruction: str,
|
|
114
|
-
) -> Tuple[bool, str, Optional[str]]:
|
|
115
|
-
"""
|
|
116
|
-
Modify HTML file stored in Google Cloud Storage using Gemini 2.0 Flash LLM.
|
|
117
|
-
|
|
118
|
-
The function parses the GCS URL, fetches the HTML content, sends it to Gemini with
|
|
119
|
-
the modification instruction, executes the generated Python code to modify the HTML,
|
|
120
|
-
and updates the original file in GCS.
|
|
121
|
-
|
|
122
|
-
Args:
|
|
123
|
-
html_url: GCS URL pointing to an HTML file
|
|
124
|
-
(format: https://storage.googleapis.com/[bucket-name]/[doc-path])
|
|
125
|
-
modification_instruction: Natural language instruction for how to modify the HTML
|
|
126
|
-
|
|
127
|
-
Returns:
|
|
128
|
-
Tuple[bool, str, Optional[str]]: (success, original_url, error_message)
|
|
129
|
-
"""
|
|
130
|
-
try:
|
|
131
|
-
# 1. Parse the GCS URL to extract bucket name and document path
|
|
132
|
-
url_parts = urlparse(html_url)
|
|
133
|
-
|
|
134
|
-
# Strip query parameters from the URL for processing
|
|
135
|
-
clean_url_parts = url_parts._replace(query="")
|
|
136
|
-
clean_url = urlunparse(clean_url_parts)
|
|
137
|
-
|
|
138
|
-
if not url_parts.netloc.startswith("storage.googleapis.com"):
|
|
139
|
-
return False, html_url, "Error: URL must be a Google Cloud Storage URL"
|
|
140
|
-
|
|
141
|
-
# Extract bucket name and document path correctly
|
|
142
|
-
path_segments = url_parts.path.strip("/").split("/", 1)
|
|
143
|
-
if len(path_segments) < 2:
|
|
144
|
-
return False, html_url, "Error: Invalid GCS URL format"
|
|
145
|
-
|
|
146
|
-
bucket_name = path_segments[0]
|
|
147
|
-
document_path = path_segments[1]
|
|
148
|
-
|
|
149
|
-
# URL decode the document path to handle encoded characters like %40
|
|
150
|
-
decoded_document_path = unquote(document_path)
|
|
151
|
-
|
|
152
|
-
# 2. Fetch the HTML content from GCS
|
|
153
|
-
try:
|
|
154
|
-
# First try to get the HTML directly via the URL
|
|
155
|
-
response = requests.get(clean_url)
|
|
156
|
-
if response.status_code != 200:
|
|
157
|
-
# If direct access fails, use the storage client
|
|
158
|
-
storage = storage_store_factory()
|
|
159
|
-
# Use the original (non-decoded) path for retrieval since that's how it's stored in GCS
|
|
160
|
-
file_object = await storage.retrieve_file(document_path)
|
|
161
|
-
if not file_object:
|
|
162
|
-
return (
|
|
163
|
-
False,
|
|
164
|
-
html_url,
|
|
165
|
-
"Error: Could not retrieve HTML file from GCS",
|
|
166
|
-
)
|
|
167
|
-
# Explicitly decode with UTF-8 to properly handle non-ASCII characters
|
|
168
|
-
html_content = file_object.getvalue().decode("utf-8")
|
|
169
|
-
else:
|
|
170
|
-
# Set encoding for response text (use UTF-8 or detect from content)
|
|
171
|
-
if "charset=" in response.headers.get("content-type", ""):
|
|
172
|
-
# Extract charset from content-type header
|
|
173
|
-
charset = (
|
|
174
|
-
response.headers.get("content-type")
|
|
175
|
-
.split("charset=")[1]
|
|
176
|
-
.split(";")[0]
|
|
177
|
-
)
|
|
178
|
-
response.encoding = charset
|
|
179
|
-
else:
|
|
180
|
-
# Default to UTF-8 if not specified
|
|
181
|
-
response.encoding = "utf-8"
|
|
182
|
-
html_content = response.text
|
|
183
|
-
except Exception as e:
|
|
184
|
-
return False, html_url, f"Error retrieving HTML content: {str(e)}"
|
|
185
|
-
|
|
186
|
-
# 3. Call Gemini API to generate Python code for HTML modification
|
|
187
|
-
try:
|
|
188
|
-
# Import here to avoid loading time and potential circular imports
|
|
189
|
-
import google.generativeai as genai
|
|
190
|
-
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
|
191
|
-
|
|
192
|
-
# Initialize Gemini client
|
|
193
|
-
api_key = os.getenv("GEMINI_API_KEY", "")
|
|
194
|
-
if not api_key:
|
|
195
|
-
return (
|
|
196
|
-
False,
|
|
197
|
-
html_url,
|
|
198
|
-
"Error: GEMINI_API_KEY environment variable not set",
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
genai.configure(api_key=api_key)
|
|
202
|
-
model = genai.GenerativeModel(MODIFY_GCS_HTML_MODEL)
|
|
203
|
-
|
|
204
|
-
# Create prompt for Gemini
|
|
205
|
-
prompt = f"""You are an expert HTML and Python developer.
|
|
206
|
-
Your task is to modify an HTML document according to the following instruction:
|
|
207
|
-
"{modification_instruction}"
|
|
208
|
-
|
|
209
|
-
Here is the HTML code to modify:
|
|
210
|
-
```html
|
|
211
|
-
{html_content}
|
|
212
|
-
```
|
|
213
|
-
|
|
214
|
-
Please provide minimal Python code that makes these modifications to the HTML.
|
|
215
|
-
Your code must:
|
|
216
|
-
1. Use BeautifulSoup4 to parse and modify the HTML
|
|
217
|
-
2. Return the modified HTML as a string
|
|
218
|
-
3. Use a function called 'modify_html' that takes the original HTML as input and returns the modified HTML
|
|
219
|
-
4. Only include essential code to make the exact change requested - no explanations or verbose comments
|
|
220
|
-
5. Ensure you preserve the character encoding for non-ASCII characters
|
|
221
|
-
6. Use BeautifulSoup with features='html.parser'
|
|
222
|
-
|
|
223
|
-
Only provide the Python code, nothing else. Keep the code minimal and direct."""
|
|
224
|
-
|
|
225
|
-
# Generate the Python code
|
|
226
|
-
response = model.generate_content(prompt)
|
|
227
|
-
generated_code = response.text
|
|
228
|
-
|
|
229
|
-
# Extract Python code if it's wrapped in ```python ... ```
|
|
230
|
-
if "```python" in generated_code:
|
|
231
|
-
python_code_match = re.search(
|
|
232
|
-
r"```python(.*?)```", generated_code, re.DOTALL
|
|
233
|
-
)
|
|
234
|
-
if python_code_match:
|
|
235
|
-
generated_code = python_code_match.group(1).strip()
|
|
236
|
-
elif "```" in generated_code:
|
|
237
|
-
python_code_match = re.search(r"```(.*?)```", generated_code, re.DOTALL)
|
|
238
|
-
if python_code_match:
|
|
239
|
-
generated_code = python_code_match.group(1).strip()
|
|
240
|
-
|
|
241
|
-
# 4. Execute the generated Python code
|
|
242
|
-
# Create a safe execution environment
|
|
243
|
-
try:
|
|
244
|
-
local_vars = {"original_html": html_content}
|
|
245
|
-
# Make sure we have BeautifulSoup available
|
|
246
|
-
exec("from bs4 import BeautifulSoup", local_vars)
|
|
247
|
-
|
|
248
|
-
# Execute the generated code
|
|
249
|
-
exec(generated_code, local_vars)
|
|
250
|
-
|
|
251
|
-
# Call the modify_html function
|
|
252
|
-
if "modify_html" in local_vars:
|
|
253
|
-
modified_html = local_vars["modify_html"](html_content)
|
|
254
|
-
else:
|
|
255
|
-
return (
|
|
256
|
-
False,
|
|
257
|
-
html_url,
|
|
258
|
-
"Error: Generated code does not contain a modify_html function",
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
if not modified_html or not isinstance(modified_html, str):
|
|
262
|
-
return (
|
|
263
|
-
False,
|
|
264
|
-
html_url,
|
|
265
|
-
"Error: Generated code did not produce valid HTML",
|
|
266
|
-
)
|
|
267
|
-
|
|
268
|
-
# Check if the model actually made changes to the HTML
|
|
269
|
-
if modified_html.strip() == html_content.strip():
|
|
270
|
-
return (
|
|
271
|
-
False,
|
|
272
|
-
html_url,
|
|
273
|
-
"Error: The model didn't make any changes to the HTML. It might not understand how to perform the requested modification.",
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
except Exception as e:
|
|
277
|
-
return False, html_url, f"Error executing generated code: {str(e)}"
|
|
278
|
-
|
|
279
|
-
# 5. Update the original HTML file in GCS
|
|
280
|
-
try:
|
|
281
|
-
storage = storage_store_factory()
|
|
282
|
-
# Explicitly encode with UTF-8 to preserve non-ASCII characters
|
|
283
|
-
file_object = BytesIO(modified_html.encode("utf-8"))
|
|
284
|
-
|
|
285
|
-
# Store the modified file back to the same location using the decoded path
|
|
286
|
-
# This ensures proper handling of special characters like @ in the path
|
|
287
|
-
success, _ = await storage.store_file(
|
|
288
|
-
decoded_document_path,
|
|
289
|
-
file_object,
|
|
290
|
-
public=True,
|
|
291
|
-
content_type="text/html; charset=utf-8", # Explicitly set UTF-8 charset
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
if not success:
|
|
295
|
-
return (
|
|
296
|
-
False,
|
|
297
|
-
html_url,
|
|
298
|
-
"Error: Failed to update the HTML file in GCS",
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
# Add timestamp as query parameter to the URL to bypass cache
|
|
302
|
-
timestamp = int(time.time())
|
|
303
|
-
url_with_timestamp = urlparse(clean_url)
|
|
304
|
-
new_query = urlencode({"t": timestamp})
|
|
305
|
-
final_url = urlunparse(url_with_timestamp._replace(query=new_query))
|
|
306
|
-
|
|
307
|
-
return True, final_url, None
|
|
308
|
-
|
|
309
|
-
except Exception as e:
|
|
310
|
-
return False, html_url, f"Error updating HTML file: {str(e)}"
|
|
311
|
-
|
|
312
|
-
except Exception as e:
|
|
313
|
-
return False, html_url, f"Error generating modification code: {str(e)}"
|
|
314
|
-
|
|
315
|
-
except Exception as e:
|
|
316
|
-
return False, html_url, f"Error: {str(e)}"
|
|
1
|
+
import os
|
|
2
|
+
from tempfile import NamedTemporaryFile
|
|
3
|
+
from typing import Dict, Any, Optional, Tuple
|
|
4
|
+
import re
|
|
5
|
+
from urllib.parse import urlparse, urlunparse, unquote, parse_qs, urlencode
|
|
6
|
+
import time
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from botrun_flow_lang.constants import MODIFY_GCS_HTML_MODEL
|
|
11
|
+
from .local_files import upload_html_and_get_public_url
|
|
12
|
+
from botrun_flow_lang.services.storage.storage_factory import storage_store_factory
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def generate_html_file(
|
|
16
|
+
html_content: str,
|
|
17
|
+
botrun_flow_lang_url: str,
|
|
18
|
+
user_id: str,
|
|
19
|
+
title: Optional[str] = None,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Generate HTML file from complete HTML content (including JS and CSS) and upload it to GCS.
|
|
23
|
+
|
|
24
|
+
This function accepts complete HTML documents with JavaScript, CSS, and other elements.
|
|
25
|
+
You can pass either:
|
|
26
|
+
1. A complete HTML document (<!DOCTYPE html><html>...<head>...</head><body>...</body></html>)
|
|
27
|
+
2. HTML fragment that will be wrapped in a basic HTML structure if needed
|
|
28
|
+
|
|
29
|
+
The function preserves all JavaScript, CSS, and other elements in the HTML content.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
html_content: Complete HTML content string, including head/body tags, JavaScript, CSS, etc.
|
|
33
|
+
botrun_flow_lang_url: URL for the botrun flow lang API
|
|
34
|
+
user_id: User ID for file upload
|
|
35
|
+
title: Optional title for the HTML page (used only if the HTML doesn't already have a title)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str: URL for the HTML file or error message starting with "Error: "
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
# Check if the content is already a complete HTML document
|
|
42
|
+
is_complete_html = html_content.strip().lower().startswith(
|
|
43
|
+
"<!doctype html"
|
|
44
|
+
) or html_content.strip().lower().startswith("<html")
|
|
45
|
+
|
|
46
|
+
# Only process HTML content if it's not already a complete document
|
|
47
|
+
if not is_complete_html:
|
|
48
|
+
# If not a complete HTML document, check if it has a head tag
|
|
49
|
+
if "<head>" in html_content.lower():
|
|
50
|
+
# Has head tag but not complete doc, add title if needed and provided
|
|
51
|
+
if title and "<title>" not in html_content.lower():
|
|
52
|
+
html_content = html_content.replace(
|
|
53
|
+
"<head>", f"<head>\n <title>{title}</title>", 1
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
# No head tag, wrap the content in a basic HTML structure
|
|
57
|
+
html_content = f"""
|
|
58
|
+
<!DOCTYPE html>
|
|
59
|
+
<html>
|
|
60
|
+
<head>
|
|
61
|
+
<meta charset="utf-8">
|
|
62
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
63
|
+
<title>{title if title else 'HTML Page'}</title>
|
|
64
|
+
<style>
|
|
65
|
+
body {{
|
|
66
|
+
font-family: "Microsoft JhengHei", "微軟正黑體", "Heiti TC", "黑體-繁", sans-serif;
|
|
67
|
+
}}
|
|
68
|
+
</style>
|
|
69
|
+
</head>
|
|
70
|
+
<body>
|
|
71
|
+
{html_content}
|
|
72
|
+
</body>
|
|
73
|
+
</html>
|
|
74
|
+
"""
|
|
75
|
+
# If we have complete HTML but title is provided and no title exists
|
|
76
|
+
elif title and "<title>" not in html_content.lower():
|
|
77
|
+
# Try to insert title into the head tag
|
|
78
|
+
if "<head>" in html_content.lower():
|
|
79
|
+
html_content = html_content.replace(
|
|
80
|
+
"<head>", f"<head>\n <title>{title}</title>", 1
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Create temporary file
|
|
84
|
+
with NamedTemporaryFile(
|
|
85
|
+
suffix=".html", mode="w", encoding="utf-8", delete=False
|
|
86
|
+
) as html_temp:
|
|
87
|
+
try:
|
|
88
|
+
# Save HTML content
|
|
89
|
+
html_temp.write(html_content)
|
|
90
|
+
html_temp.flush()
|
|
91
|
+
|
|
92
|
+
# Upload file to GCS
|
|
93
|
+
html_url = await upload_html_and_get_public_url(
|
|
94
|
+
html_temp.name, botrun_flow_lang_url, user_id
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Clean up temporary file
|
|
98
|
+
os.unlink(html_temp.name)
|
|
99
|
+
|
|
100
|
+
return html_url
|
|
101
|
+
except Exception as e:
|
|
102
|
+
# Clean up temporary file in case of error
|
|
103
|
+
os.unlink(html_temp.name)
|
|
104
|
+
return f"Error: {str(e)}"
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
return f"Error: {str(e)}"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# todo 還沒改完,我的測試案例測到 3 之後,就不會再增加了
|
|
111
|
+
async def modify_gcs_html(
|
|
112
|
+
html_url: str,
|
|
113
|
+
modification_instruction: str,
|
|
114
|
+
) -> Tuple[bool, str, Optional[str]]:
|
|
115
|
+
"""
|
|
116
|
+
Modify HTML file stored in Google Cloud Storage using Gemini 2.0 Flash LLM.
|
|
117
|
+
|
|
118
|
+
The function parses the GCS URL, fetches the HTML content, sends it to Gemini with
|
|
119
|
+
the modification instruction, executes the generated Python code to modify the HTML,
|
|
120
|
+
and updates the original file in GCS.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
html_url: GCS URL pointing to an HTML file
|
|
124
|
+
(format: https://storage.googleapis.com/[bucket-name]/[doc-path])
|
|
125
|
+
modification_instruction: Natural language instruction for how to modify the HTML
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Tuple[bool, str, Optional[str]]: (success, original_url, error_message)
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
# 1. Parse the GCS URL to extract bucket name and document path
|
|
132
|
+
url_parts = urlparse(html_url)
|
|
133
|
+
|
|
134
|
+
# Strip query parameters from the URL for processing
|
|
135
|
+
clean_url_parts = url_parts._replace(query="")
|
|
136
|
+
clean_url = urlunparse(clean_url_parts)
|
|
137
|
+
|
|
138
|
+
if not url_parts.netloc.startswith("storage.googleapis.com"):
|
|
139
|
+
return False, html_url, "Error: URL must be a Google Cloud Storage URL"
|
|
140
|
+
|
|
141
|
+
# Extract bucket name and document path correctly
|
|
142
|
+
path_segments = url_parts.path.strip("/").split("/", 1)
|
|
143
|
+
if len(path_segments) < 2:
|
|
144
|
+
return False, html_url, "Error: Invalid GCS URL format"
|
|
145
|
+
|
|
146
|
+
bucket_name = path_segments[0]
|
|
147
|
+
document_path = path_segments[1]
|
|
148
|
+
|
|
149
|
+
# URL decode the document path to handle encoded characters like %40
|
|
150
|
+
decoded_document_path = unquote(document_path)
|
|
151
|
+
|
|
152
|
+
# 2. Fetch the HTML content from GCS
|
|
153
|
+
try:
|
|
154
|
+
# First try to get the HTML directly via the URL
|
|
155
|
+
response = requests.get(clean_url)
|
|
156
|
+
if response.status_code != 200:
|
|
157
|
+
# If direct access fails, use the storage client
|
|
158
|
+
storage = storage_store_factory()
|
|
159
|
+
# Use the original (non-decoded) path for retrieval since that's how it's stored in GCS
|
|
160
|
+
file_object = await storage.retrieve_file(document_path)
|
|
161
|
+
if not file_object:
|
|
162
|
+
return (
|
|
163
|
+
False,
|
|
164
|
+
html_url,
|
|
165
|
+
"Error: Could not retrieve HTML file from GCS",
|
|
166
|
+
)
|
|
167
|
+
# Explicitly decode with UTF-8 to properly handle non-ASCII characters
|
|
168
|
+
html_content = file_object.getvalue().decode("utf-8")
|
|
169
|
+
else:
|
|
170
|
+
# Set encoding for response text (use UTF-8 or detect from content)
|
|
171
|
+
if "charset=" in response.headers.get("content-type", ""):
|
|
172
|
+
# Extract charset from content-type header
|
|
173
|
+
charset = (
|
|
174
|
+
response.headers.get("content-type")
|
|
175
|
+
.split("charset=")[1]
|
|
176
|
+
.split(";")[0]
|
|
177
|
+
)
|
|
178
|
+
response.encoding = charset
|
|
179
|
+
else:
|
|
180
|
+
# Default to UTF-8 if not specified
|
|
181
|
+
response.encoding = "utf-8"
|
|
182
|
+
html_content = response.text
|
|
183
|
+
except Exception as e:
|
|
184
|
+
return False, html_url, f"Error retrieving HTML content: {str(e)}"
|
|
185
|
+
|
|
186
|
+
# 3. Call Gemini API to generate Python code for HTML modification
|
|
187
|
+
try:
|
|
188
|
+
# Import here to avoid loading time and potential circular imports
|
|
189
|
+
import google.generativeai as genai
|
|
190
|
+
from google.generativeai.types import HarmCategory, HarmBlockThreshold
|
|
191
|
+
|
|
192
|
+
# Initialize Gemini client
|
|
193
|
+
api_key = os.getenv("GEMINI_API_KEY", "")
|
|
194
|
+
if not api_key:
|
|
195
|
+
return (
|
|
196
|
+
False,
|
|
197
|
+
html_url,
|
|
198
|
+
"Error: GEMINI_API_KEY environment variable not set",
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
genai.configure(api_key=api_key)
|
|
202
|
+
model = genai.GenerativeModel(MODIFY_GCS_HTML_MODEL)
|
|
203
|
+
|
|
204
|
+
# Create prompt for Gemini
|
|
205
|
+
prompt = f"""You are an expert HTML and Python developer.
|
|
206
|
+
Your task is to modify an HTML document according to the following instruction:
|
|
207
|
+
"{modification_instruction}"
|
|
208
|
+
|
|
209
|
+
Here is the HTML code to modify:
|
|
210
|
+
```html
|
|
211
|
+
{html_content}
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Please provide minimal Python code that makes these modifications to the HTML.
|
|
215
|
+
Your code must:
|
|
216
|
+
1. Use BeautifulSoup4 to parse and modify the HTML
|
|
217
|
+
2. Return the modified HTML as a string
|
|
218
|
+
3. Use a function called 'modify_html' that takes the original HTML as input and returns the modified HTML
|
|
219
|
+
4. Only include essential code to make the exact change requested - no explanations or verbose comments
|
|
220
|
+
5. Ensure you preserve the character encoding for non-ASCII characters
|
|
221
|
+
6. Use BeautifulSoup with features='html.parser'
|
|
222
|
+
|
|
223
|
+
Only provide the Python code, nothing else. Keep the code minimal and direct."""
|
|
224
|
+
|
|
225
|
+
# Generate the Python code
|
|
226
|
+
response = model.generate_content(prompt)
|
|
227
|
+
generated_code = response.text
|
|
228
|
+
|
|
229
|
+
# Extract Python code if it's wrapped in ```python ... ```
|
|
230
|
+
if "```python" in generated_code:
|
|
231
|
+
python_code_match = re.search(
|
|
232
|
+
r"```python(.*?)```", generated_code, re.DOTALL
|
|
233
|
+
)
|
|
234
|
+
if python_code_match:
|
|
235
|
+
generated_code = python_code_match.group(1).strip()
|
|
236
|
+
elif "```" in generated_code:
|
|
237
|
+
python_code_match = re.search(r"```(.*?)```", generated_code, re.DOTALL)
|
|
238
|
+
if python_code_match:
|
|
239
|
+
generated_code = python_code_match.group(1).strip()
|
|
240
|
+
|
|
241
|
+
# 4. Execute the generated Python code
|
|
242
|
+
# Create a safe execution environment
|
|
243
|
+
try:
|
|
244
|
+
local_vars = {"original_html": html_content}
|
|
245
|
+
# Make sure we have BeautifulSoup available
|
|
246
|
+
exec("from bs4 import BeautifulSoup", local_vars)
|
|
247
|
+
|
|
248
|
+
# Execute the generated code
|
|
249
|
+
exec(generated_code, local_vars)
|
|
250
|
+
|
|
251
|
+
# Call the modify_html function
|
|
252
|
+
if "modify_html" in local_vars:
|
|
253
|
+
modified_html = local_vars["modify_html"](html_content)
|
|
254
|
+
else:
|
|
255
|
+
return (
|
|
256
|
+
False,
|
|
257
|
+
html_url,
|
|
258
|
+
"Error: Generated code does not contain a modify_html function",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if not modified_html or not isinstance(modified_html, str):
|
|
262
|
+
return (
|
|
263
|
+
False,
|
|
264
|
+
html_url,
|
|
265
|
+
"Error: Generated code did not produce valid HTML",
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Check if the model actually made changes to the HTML
|
|
269
|
+
if modified_html.strip() == html_content.strip():
|
|
270
|
+
return (
|
|
271
|
+
False,
|
|
272
|
+
html_url,
|
|
273
|
+
"Error: The model didn't make any changes to the HTML. It might not understand how to perform the requested modification.",
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
except Exception as e:
|
|
277
|
+
return False, html_url, f"Error executing generated code: {str(e)}"
|
|
278
|
+
|
|
279
|
+
# 5. Update the original HTML file in GCS
|
|
280
|
+
try:
|
|
281
|
+
storage = storage_store_factory()
|
|
282
|
+
# Explicitly encode with UTF-8 to preserve non-ASCII characters
|
|
283
|
+
file_object = BytesIO(modified_html.encode("utf-8"))
|
|
284
|
+
|
|
285
|
+
# Store the modified file back to the same location using the decoded path
|
|
286
|
+
# This ensures proper handling of special characters like @ in the path
|
|
287
|
+
success, _ = await storage.store_file(
|
|
288
|
+
decoded_document_path,
|
|
289
|
+
file_object,
|
|
290
|
+
public=True,
|
|
291
|
+
content_type="text/html; charset=utf-8", # Explicitly set UTF-8 charset
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if not success:
|
|
295
|
+
return (
|
|
296
|
+
False,
|
|
297
|
+
html_url,
|
|
298
|
+
"Error: Failed to update the HTML file in GCS",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Add timestamp as query parameter to the URL to bypass cache
|
|
302
|
+
timestamp = int(time.time())
|
|
303
|
+
url_with_timestamp = urlparse(clean_url)
|
|
304
|
+
new_query = urlencode({"t": timestamp})
|
|
305
|
+
final_url = urlunparse(url_with_timestamp._replace(query=new_query))
|
|
306
|
+
|
|
307
|
+
return True, final_url, None
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
return False, html_url, f"Error updating HTML file: {str(e)}"
|
|
311
|
+
|
|
312
|
+
except Exception as e:
|
|
313
|
+
return False, html_url, f"Error generating modification code: {str(e)}"
|
|
314
|
+
|
|
315
|
+
except Exception as e:
|
|
316
|
+
return False, html_url, f"Error: {str(e)}"
|