flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +21 -0
- flowllm/app.py +15 -0
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/default.yaml +77 -0
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +79 -0
- flowllm/context/flow_context.py +16 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +30 -0
- flowllm/context/service_context.py +147 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/expression_parser.py +171 -0
- flowllm/llm/__init__.py +2 -0
- {llmflow → flowllm}/llm/base_llm.py +100 -18
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +439 -0
- flowllm/op/__init__.py +11 -0
- llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +108 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +140 -0
- flowllm/op/base_llm_op.py +64 -0
- flowllm/op/base_op.py +148 -0
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/gallery/mock_op.py +42 -0
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +23 -0
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +21 -0
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +72 -0
- flowllm/schema/tool_call.py +118 -0
- {llmflow → flowllm}/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -0
- flowllm/service/base_service.py +68 -0
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +79 -0
- flowllm/service/mcp_service.py +47 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/storage/cache/data_cache.py +375 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
- flowllm/utils/common_utils.py +52 -0
- flowllm/utils/fetch_url.py +117 -0
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.2.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/llm/openai_compatible_llm.py +0 -283
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/context}/__init__.py +0 -0
- {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
- {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
- /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py
RENAMED
@@ -1,18 +1,20 @@
|
|
1
|
+
import fcntl
|
2
|
+
import json
|
1
3
|
import math
|
2
4
|
from pathlib import Path
|
3
5
|
from typing import List, Iterable
|
4
6
|
|
5
7
|
from loguru import logger
|
6
8
|
from pydantic import Field, model_validator
|
9
|
+
from tqdm import tqdm
|
7
10
|
|
8
|
-
from
|
9
|
-
from
|
10
|
-
from
|
11
|
-
from llmflow.vector_store.base_vector_store import BaseVectorStore
|
11
|
+
from flowllm.context.service_context import C
|
12
|
+
from flowllm.schema.vector_node import VectorNode
|
13
|
+
from flowllm.storage.vector_store.base_vector_store import BaseVectorStore
|
12
14
|
|
13
15
|
|
14
|
-
@
|
15
|
-
class
|
16
|
+
@C.register_vector_store("local")
|
17
|
+
class LocalVectorStore(BaseVectorStore):
|
16
18
|
store_dir: str = Field(default="./file_vector_store")
|
17
19
|
|
18
20
|
@model_validator(mode="after")
|
@@ -21,6 +23,55 @@ class FileVectorStore(BaseVectorStore):
|
|
21
23
|
store_path.mkdir(parents=True, exist_ok=True)
|
22
24
|
return self
|
23
25
|
|
26
|
+
@staticmethod
|
27
|
+
def _load_from_path(workspace_id: str, path: str | Path, callback_fn=None, **kwargs) -> Iterable[VectorNode]:
|
28
|
+
workspace_path = Path(path) / f"{workspace_id}.jsonl"
|
29
|
+
if not workspace_path.exists():
|
30
|
+
logger.warning(f"workspace_path={workspace_path} is not exists!")
|
31
|
+
return
|
32
|
+
|
33
|
+
with workspace_path.open() as f:
|
34
|
+
fcntl.flock(f, fcntl.LOCK_SH)
|
35
|
+
try:
|
36
|
+
for line in tqdm(f, desc="load from path"):
|
37
|
+
if line.strip():
|
38
|
+
node_dict = json.loads(line.strip())
|
39
|
+
if callback_fn:
|
40
|
+
node = callback_fn(node_dict)
|
41
|
+
else:
|
42
|
+
node = VectorNode(**node_dict, **kwargs)
|
43
|
+
node.workspace_id = workspace_id
|
44
|
+
yield node
|
45
|
+
|
46
|
+
finally:
|
47
|
+
fcntl.flock(f, fcntl.LOCK_UN)
|
48
|
+
|
49
|
+
@staticmethod
|
50
|
+
def _dump_to_path(nodes: Iterable[VectorNode], workspace_id: str, path: str | Path = "", callback_fn=None,
|
51
|
+
ensure_ascii: bool = False, **kwargs):
|
52
|
+
dump_path: Path = Path(path)
|
53
|
+
dump_path.mkdir(parents=True, exist_ok=True)
|
54
|
+
dump_file = dump_path / f"{workspace_id}.jsonl"
|
55
|
+
|
56
|
+
count = 0
|
57
|
+
with dump_file.open("w") as f:
|
58
|
+
fcntl.flock(f, fcntl.LOCK_EX)
|
59
|
+
try:
|
60
|
+
for node in tqdm(nodes, desc="dump to path"):
|
61
|
+
node.workspace_id = workspace_id
|
62
|
+
if callback_fn:
|
63
|
+
node_dict = callback_fn(node)
|
64
|
+
else:
|
65
|
+
node_dict = node.model_dump()
|
66
|
+
assert isinstance(node_dict, dict)
|
67
|
+
f.write(json.dumps(node_dict, ensure_ascii=ensure_ascii, **kwargs))
|
68
|
+
f.write("\n")
|
69
|
+
count += 1
|
70
|
+
|
71
|
+
return {"size": count}
|
72
|
+
finally:
|
73
|
+
fcntl.flock(f, fcntl.LOCK_UN)
|
74
|
+
|
24
75
|
@property
|
25
76
|
def store_path(self) -> Path:
|
26
77
|
return Path(self.store_dir)
|
@@ -41,6 +92,54 @@ class FileVectorStore(BaseVectorStore):
|
|
41
92
|
for i, node in enumerate(self._load_from_path(path=self.store_path, workspace_id=workspace_id, **kwargs)):
|
42
93
|
yield node
|
43
94
|
|
95
|
+
def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
|
96
|
+
if not self.exist_workspace(workspace_id=workspace_id, **kwargs):
|
97
|
+
logger.warning(f"workspace_id={workspace_id} is not exist!")
|
98
|
+
return {}
|
99
|
+
|
100
|
+
return self._dump_to_path(nodes=self._iter_workspace_nodes(workspace_id=workspace_id, **kwargs),
|
101
|
+
workspace_id=workspace_id,
|
102
|
+
path=path,
|
103
|
+
callback_fn=callback_fn,
|
104
|
+
**kwargs)
|
105
|
+
|
106
|
+
def load_workspace(self, workspace_id: str, path: str | Path = "", nodes: List[VectorNode] = None, callback_fn=None,
|
107
|
+
**kwargs):
|
108
|
+
if self.exist_workspace(workspace_id, **kwargs):
|
109
|
+
self.delete_workspace(workspace_id=workspace_id, **kwargs)
|
110
|
+
logger.info(f"delete workspace_id={workspace_id}")
|
111
|
+
|
112
|
+
self.create_workspace(workspace_id=workspace_id, **kwargs)
|
113
|
+
|
114
|
+
all_nodes: List[VectorNode] = []
|
115
|
+
if nodes:
|
116
|
+
all_nodes.extend(nodes)
|
117
|
+
for node in self._load_from_path(path=path, workspace_id=workspace_id, callback_fn=callback_fn, **kwargs):
|
118
|
+
all_nodes.append(node)
|
119
|
+
self.insert(nodes=all_nodes, workspace_id=workspace_id, **kwargs)
|
120
|
+
return {"size": len(all_nodes)}
|
121
|
+
|
122
|
+
def copy_workspace(self, src_workspace_id: str, dest_workspace_id: str, **kwargs):
|
123
|
+
if not self.exist_workspace(workspace_id=src_workspace_id, **kwargs):
|
124
|
+
logger.warning(f"src_workspace_id={src_workspace_id} is not exist!")
|
125
|
+
return {}
|
126
|
+
|
127
|
+
if not self.exist_workspace(dest_workspace_id, **kwargs):
|
128
|
+
self.create_workspace(workspace_id=dest_workspace_id, **kwargs)
|
129
|
+
|
130
|
+
nodes = []
|
131
|
+
node_size = 0
|
132
|
+
for node in self._iter_workspace_nodes(workspace_id=src_workspace_id, **kwargs):
|
133
|
+
nodes.append(node)
|
134
|
+
node_size += 1
|
135
|
+
if len(nodes) >= self.batch_size:
|
136
|
+
self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
|
137
|
+
nodes.clear()
|
138
|
+
|
139
|
+
if nodes:
|
140
|
+
self.insert(nodes=nodes, workspace_id=dest_workspace_id, **kwargs)
|
141
|
+
return {"size": node_size}
|
142
|
+
|
44
143
|
@staticmethod
|
45
144
|
def calculate_similarity(query_vector: List[float], node_vector: List[float]):
|
46
145
|
assert query_vector, f"query_vector is empty!"
|
@@ -104,14 +203,15 @@ class FileVectorStore(BaseVectorStore):
|
|
104
203
|
self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
|
105
204
|
logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
|
106
205
|
|
107
|
-
|
108
206
|
def main():
|
109
|
-
from
|
110
|
-
|
207
|
+
from flowllm.utils.common_utils import load_env
|
208
|
+
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
209
|
+
|
210
|
+
load_env()
|
111
211
|
|
112
212
|
embedding_model = OpenAICompatibleEmbeddingModel(dimensions=64, model_name="text-embedding-v4")
|
113
213
|
workspace_id = "rag_nodes_index"
|
114
|
-
client =
|
214
|
+
client = LocalVectorStore(embedding_model=embedding_model)
|
115
215
|
client.delete_workspace(workspace_id)
|
116
216
|
client.create_workspace(workspace_id)
|
117
217
|
|
@@ -160,4 +260,3 @@ def main():
|
|
160
260
|
|
161
261
|
if __name__ == "__main__":
|
162
262
|
main()
|
163
|
-
# launch with: python -m llmflow.storage.file_vector_store
|
@@ -0,0 +1,52 @@
|
|
1
|
+
import os
|
2
|
+
import re
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
|
8
|
+
def camel_to_snake(content: str) -> str:
|
9
|
+
"""
|
10
|
+
BaseWorker -> base_worker
|
11
|
+
"""
|
12
|
+
snake_str = re.sub(r'(?<!^)(?=[A-Z])', '_', content).lower()
|
13
|
+
return snake_str
|
14
|
+
|
15
|
+
|
16
|
+
def snake_to_camel(content: str) -> str:
|
17
|
+
"""
|
18
|
+
base_worker -> BaseWorker
|
19
|
+
"""
|
20
|
+
camel_str = "".join(x.capitalize() for x in content.split("_"))
|
21
|
+
return camel_str
|
22
|
+
|
23
|
+
|
24
|
+
def _load_env(path: Path):
|
25
|
+
with path.open() as f:
|
26
|
+
for line in f:
|
27
|
+
line = line.strip()
|
28
|
+
if line.startswith("#"):
|
29
|
+
continue
|
30
|
+
|
31
|
+
line_split = line.strip().split("=", 1)
|
32
|
+
if len(line_split) >= 2:
|
33
|
+
key = line_split[0].strip()
|
34
|
+
value = line_split[1].strip()
|
35
|
+
os.environ[key] = value
|
36
|
+
|
37
|
+
|
38
|
+
def load_env(path: str | Path = None):
|
39
|
+
if path is not None:
|
40
|
+
path = Path(path)
|
41
|
+
if path.exists():
|
42
|
+
_load_env(path)
|
43
|
+
|
44
|
+
else:
|
45
|
+
for i in range(5):
|
46
|
+
path = Path("../" * i + ".env")
|
47
|
+
if path.exists():
|
48
|
+
logger.info(f"using path={path}")
|
49
|
+
_load_env(path)
|
50
|
+
return
|
51
|
+
|
52
|
+
raise FileNotFoundError(".env not found")
|
@@ -0,0 +1,117 @@
|
|
1
|
+
import random
|
2
|
+
import time
|
3
|
+
|
4
|
+
import requests
|
5
|
+
import urllib3
|
6
|
+
from bs4 import BeautifulSoup
|
7
|
+
|
8
|
+
# Disable SSL warnings (optional, for handling insecure HTTPS)
|
9
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
10
|
+
|
11
|
+
|
12
|
+
def get_random_headers():
|
13
|
+
"""Generate random headers to avoid detection"""
|
14
|
+
user_agents = [
|
15
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
16
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
|
17
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
|
18
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
19
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
|
20
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
21
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
22
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0',
|
23
|
+
'Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
24
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
|
25
|
+
]
|
26
|
+
|
27
|
+
accept_languages = [
|
28
|
+
'en-US,en;q=0.9',
|
29
|
+
'zh-CN,zh;q=0.9,en;q=0.8',
|
30
|
+
'en-GB,en;q=0.9',
|
31
|
+
'fr-FR,fr;q=0.9,en;q=0.8',
|
32
|
+
'de-DE,de;q=0.9,en;q=0.8'
|
33
|
+
]
|
34
|
+
|
35
|
+
accept_encodings = [
|
36
|
+
'gzip, deflate, br',
|
37
|
+
'gzip, deflate',
|
38
|
+
'br, gzip, deflate'
|
39
|
+
]
|
40
|
+
|
41
|
+
headers = {
|
42
|
+
'User-Agent': random.choice(user_agents),
|
43
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
44
|
+
'Accept-Language': random.choice(accept_languages),
|
45
|
+
'Accept-Encoding': random.choice(accept_encodings),
|
46
|
+
'Connection': 'keep-alive',
|
47
|
+
'Upgrade-Insecure-Requests': '1',
|
48
|
+
}
|
49
|
+
|
50
|
+
# Randomly add some optional headers
|
51
|
+
if random.random() > 0.5:
|
52
|
+
headers['DNT'] = '1'
|
53
|
+
if random.random() > 0.7:
|
54
|
+
headers['Cache-Control'] = 'max-age=0'
|
55
|
+
if random.random() > 0.6:
|
56
|
+
headers['Sec-Fetch-Dest'] = 'document'
|
57
|
+
headers['Sec-Fetch-Mode'] = 'navigate'
|
58
|
+
headers['Sec-Fetch-Site'] = 'none'
|
59
|
+
|
60
|
+
return headers
|
61
|
+
|
62
|
+
|
63
|
+
def fetch_webpage_text(url, min_delay=1, max_delay=3):
|
64
|
+
"""
|
65
|
+
Fetch and extract text content from a webpage with randomization
|
66
|
+
|
67
|
+
Args:
|
68
|
+
url (str): The URL to fetch
|
69
|
+
min_delay (int): Minimum delay before request (seconds)
|
70
|
+
max_delay (int): Maximum delay before request (seconds)
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
str: Extracted text content or error message
|
74
|
+
"""
|
75
|
+
# Add random delay to avoid being detected as bot
|
76
|
+
delay = random.uniform(min_delay, max_delay)
|
77
|
+
time.sleep(delay)
|
78
|
+
|
79
|
+
headers = get_random_headers()
|
80
|
+
|
81
|
+
# Random timeout between 8-15 seconds
|
82
|
+
timeout = random.randint(8, 15)
|
83
|
+
|
84
|
+
try:
|
85
|
+
# Send request with random headers and timeout
|
86
|
+
response = requests.get(url, headers=headers, timeout=timeout, verify=False)
|
87
|
+
response.raise_for_status() # Check if request was successful
|
88
|
+
response.encoding = response.apparent_encoding # Auto-detect encoding
|
89
|
+
|
90
|
+
# Parse HTML using BeautifulSoup
|
91
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
92
|
+
|
93
|
+
# Remove script, style and navigation elements to avoid interference
|
94
|
+
for script_or_style in soup(['script', 'style', 'nav', 'footer', 'header']):
|
95
|
+
script_or_style.decompose()
|
96
|
+
|
97
|
+
# Extract text content
|
98
|
+
text = soup.get_text()
|
99
|
+
|
100
|
+
# Clean whitespace: remove extra blank lines and spaces
|
101
|
+
lines = (line.strip() for line in text.splitlines())
|
102
|
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
103
|
+
text = ' '.join(chunk for chunk in chunks if chunk)
|
104
|
+
|
105
|
+
return text
|
106
|
+
|
107
|
+
except requests.exceptions.RequestException as e:
|
108
|
+
return f"Request failed: {e}"
|
109
|
+
except Exception as e:
|
110
|
+
return f"Parsing failed: {e}"
|
111
|
+
|
112
|
+
|
113
|
+
# Example usage
|
114
|
+
if __name__ == "__main__":
|
115
|
+
url = "http://finance.eastmoney.com/a/202508133482756869.html"
|
116
|
+
text = fetch_webpage_text(url)
|
117
|
+
print(text)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from flowllm.enumeration.role import Role
|
4
|
+
from flowllm.schema.message import Message
|
5
|
+
|
6
|
+
|
7
|
+
def merge_messages_content(messages: List[Message | dict]) -> str:
|
8
|
+
content_collector = []
|
9
|
+
for i, message in enumerate(messages):
|
10
|
+
if isinstance(message, dict):
|
11
|
+
message = Message(**message)
|
12
|
+
|
13
|
+
if message.role is Role.ASSISTANT:
|
14
|
+
line = f"### step.{i} role={message.role.value} content=\n{message.reasoning_content}\n\n{message.content}\n"
|
15
|
+
if message.tool_calls:
|
16
|
+
for tool_call in message.tool_calls:
|
17
|
+
line += f" - tool call={tool_call.name}\n params={tool_call.arguments}\n"
|
18
|
+
content_collector.append(line)
|
19
|
+
|
20
|
+
elif message.role is Role.USER:
|
21
|
+
line = f"### step.{i} role={message.role.value} content=\n{message.content}\n"
|
22
|
+
content_collector.append(line)
|
23
|
+
|
24
|
+
elif message.role is Role.TOOL:
|
25
|
+
line = f"### step.{i} role={message.role.value} tool call result=\n{message.content}\n"
|
26
|
+
content_collector.append(line)
|
27
|
+
|
28
|
+
return "\n".join(content_collector)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from sklearn.linear_model import Ridge
|
3
|
+
from sklearn.preprocessing import StandardScaler
|
4
|
+
|
5
|
+
|
6
|
+
class RidgeV2:
|
7
|
+
|
8
|
+
def __init__(self, bound: int = 4.6, use_ridge_v2=True, **kwargs):
|
9
|
+
self.bound: int = bound
|
10
|
+
self.use_ridge_v2: bool = use_ridge_v2
|
11
|
+
self.kwargs: dict = kwargs
|
12
|
+
self.model = Ridge(**self.kwargs)
|
13
|
+
|
14
|
+
def clear(self):
|
15
|
+
self.model = Ridge(**self.kwargs)
|
16
|
+
return self
|
17
|
+
|
18
|
+
def sigmoid(self, x):
|
19
|
+
x = np.asarray(x, dtype=float)
|
20
|
+
x = np.clip(x, -self.bound, self.bound)
|
21
|
+
return 1 / (1 + np.exp(-x))
|
22
|
+
|
23
|
+
def inv_sigmoid(self, p):
|
24
|
+
p = np.asarray(p, dtype=float)
|
25
|
+
p = np.clip(p, self.sigmoid(-self.bound), self.sigmoid(self.bound))
|
26
|
+
return np.log(p / (1 - p))
|
27
|
+
|
28
|
+
def fit(self, x, y, sample_weight=None):
|
29
|
+
if self.use_ridge_v2:
|
30
|
+
return self.model.fit(x, self.inv_sigmoid(y), sample_weight=sample_weight)
|
31
|
+
else:
|
32
|
+
return self.model.fit(x, y, sample_weight=sample_weight)
|
33
|
+
|
34
|
+
def predict(self, x):
|
35
|
+
if self.use_ridge_v2:
|
36
|
+
return self.sigmoid(self.model.predict(x))
|
37
|
+
else:
|
38
|
+
return self.model.predict(x)
|
39
|
+
|
40
|
+
def fit_and_predict(self,
|
41
|
+
train_x_nd: np.ndarray,
|
42
|
+
train_y_nd: np.ndarray,
|
43
|
+
test_x_nd: np.ndarray,
|
44
|
+
check_y: bool = True):
|
45
|
+
if check_y:
|
46
|
+
assert np.all((train_y_nd >= 0) & (train_y_nd <= 1))
|
47
|
+
|
48
|
+
scaler = StandardScaler()
|
49
|
+
scaler.fit(train_x_nd)
|
50
|
+
train_x_nd = scaler.transform(train_x_nd)
|
51
|
+
test_x_nd = scaler.transform(test_x_nd)
|
52
|
+
self.model.fit(train_x_nd, train_y_nd)
|
53
|
+
pred_y_nd = self.model.predict(test_x_nd)
|
54
|
+
return np.minimum(np.maximum(pred_y_nd, 0), 1)
|
@@ -1,9 +1,10 @@
|
|
1
1
|
import time
|
2
|
+
from typing import Optional
|
2
3
|
|
3
4
|
from loguru import logger
|
4
5
|
|
5
6
|
|
6
|
-
class Timer
|
7
|
+
class Timer:
|
7
8
|
def __init__(self, name: str, use_ms: bool = False, stack_level: int = 2):
|
8
9
|
self.name: str = name
|
9
10
|
self.use_ms: bool = use_ms
|
@@ -15,7 +16,7 @@ class Timer(object):
|
|
15
16
|
|
16
17
|
def __enter__(self, *args, **kwargs):
|
17
18
|
self.time_start = time.time()
|
18
|
-
logger.info(f"
|
19
|
+
logger.info(f"========== timer.{self.name} start ==========", stacklevel=self.stack_level)
|
19
20
|
return self
|
20
21
|
|
21
22
|
def __exit__(self, *args):
|
@@ -26,10 +27,10 @@ class Timer(object):
|
|
26
27
|
else:
|
27
28
|
time_str = f"{self.time_cost:.3f}s"
|
28
29
|
|
29
|
-
logger.info(f"
|
30
|
+
logger.info(f"========== timer.{self.name} end, time_cost={time_str} ==========", stacklevel=self.stack_level)
|
30
31
|
|
31
32
|
|
32
|
-
def timer(name: str = None, use_ms: bool = False, stack_level: int = 2):
|
33
|
+
def timer(name: Optional[str] = None, use_ms: bool = False, stack_level: int = 2):
|
33
34
|
def decorator(func):
|
34
35
|
def wrapper(*args, **kwargs):
|
35
36
|
with Timer(name=name or func.__name__, use_ms=use_ms, stack_level=stack_level + 1):
|