flowllm 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +15 -6
- flowllm/app.py +4 -14
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/{default_config.yaml → default.yaml} +3 -8
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +17 -17
- flowllm/context/base_context.py +27 -7
- flowllm/context/flow_context.py +6 -18
- flowllm/context/registry.py +5 -1
- flowllm/context/service_context.py +81 -37
- flowllm/embedding_model/__init__.py +1 -1
- flowllm/embedding_model/base_embedding_model.py +91 -0
- flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
- flowllm/llm/__init__.py +2 -1
- flowllm/llm/base_llm.py +94 -4
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +205 -5
- flowllm/op/__init__.py +11 -3
- flowllm/op/agent/__init__.py +0 -0
- flowllm/op/agent/react_op.py +83 -0
- flowllm/op/agent/react_prompt.yaml +28 -0
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +14 -22
- flowllm/op/akshare/get_ak_a_info_op.py +17 -20
- flowllm/op/{llm_base_op.py → base_llm_op.py} +6 -5
- flowllm/op/base_op.py +14 -35
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +2 -9
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +1 -9
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/service_config.py +12 -16
- flowllm/schema/tool_call.py +13 -5
- flowllm/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -2
- flowllm/service/base_service.py +50 -41
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +34 -42
- flowllm/service/mcp_service.py +13 -11
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
- flowllm/storage/vector_store/__init__.py +3 -3
- flowllm/storage/vector_store/es_vector_store.py +1 -2
- flowllm/storage/vector_store/local_vector_store.py +0 -1
- flowllm/utils/common_utils.py +9 -21
- flowllm/utils/fetch_url.py +16 -12
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/METADATA +43 -390
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- flowllm/flow_engine/__init__.py +0 -1
- flowllm/flow_engine/base_flow_engine.py +0 -34
- flowllm-0.1.1.dist-info/RECORD +0 -62
- flowllm-0.1.1.dist-info/entry_points.txt +0 -4
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,65 @@
|
|
1
1
|
"""
|
2
|
-
|
2
|
+
DataCache utility that supports multiple data types with local storage and data expiration functionality
|
3
3
|
"""
|
4
4
|
|
5
5
|
import json
|
6
6
|
from datetime import datetime, timedelta
|
7
7
|
from pathlib import Path
|
8
|
-
from typing import Optional, Dict, Any
|
8
|
+
from typing import Optional, Dict, Any, Union, Type
|
9
9
|
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
+
from flowllm.storage.cache.cache_data_handler import CacheDataHandler, DataFrameHandler, DictHandler
|
12
13
|
|
13
|
-
|
14
|
+
|
15
|
+
class DataCache:
|
14
16
|
"""
|
15
|
-
|
16
|
-
|
17
|
+
Generic data cache utility class
|
18
|
+
|
17
19
|
Features:
|
18
|
-
- Support for
|
20
|
+
- Support for multiple data types (DataFrame, dict, and extensible for others)
|
19
21
|
- Support for data expiration time settings
|
20
22
|
- Automatic cleanup of expired data
|
21
23
|
- Recording and managing update timestamps
|
24
|
+
- Type-specific storage formats (CSV for DataFrame, JSON for dict)
|
22
25
|
"""
|
23
26
|
|
24
|
-
def __init__(self, cache_dir: str = "
|
27
|
+
def __init__(self, cache_dir: str = "cache"):
|
25
28
|
self.cache_dir = Path(cache_dir)
|
26
29
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
27
30
|
self.metadata_file = self.cache_dir / "metadata.json"
|
28
31
|
self.metadata = {}
|
32
|
+
|
33
|
+
# Register default handlers
|
34
|
+
self.handlers: Dict[Type, CacheDataHandler] = {
|
35
|
+
pd.DataFrame: DataFrameHandler(),
|
36
|
+
dict: DictHandler()
|
37
|
+
}
|
38
|
+
|
29
39
|
self._load_metadata()
|
30
40
|
|
41
|
+
def register_handler(self, data_type: Type, handler: CacheDataHandler):
|
42
|
+
"""
|
43
|
+
Register a custom data handler for a specific data type
|
44
|
+
|
45
|
+
Args:
|
46
|
+
data_type: The data type to handle
|
47
|
+
handler: The handler instance
|
48
|
+
"""
|
49
|
+
self.handlers[data_type] = handler
|
50
|
+
|
51
|
+
def _get_handler(self, data_type: Type) -> CacheDataHandler:
|
52
|
+
"""Get the appropriate handler for a data type"""
|
53
|
+
if data_type in self.handlers:
|
54
|
+
return self.handlers[data_type]
|
55
|
+
|
56
|
+
# Try to find a handler for parent classes
|
57
|
+
for registered_type, handler in self.handlers.items():
|
58
|
+
if issubclass(data_type, registered_type):
|
59
|
+
return handler
|
60
|
+
|
61
|
+
raise ValueError(f"No handler registered for data type: {data_type}")
|
62
|
+
|
31
63
|
def _load_metadata(self):
|
32
64
|
"""Load metadata"""
|
33
65
|
if self.metadata_file.exists():
|
@@ -39,9 +71,35 @@ class DataFrameCache:
|
|
39
71
|
with open(self.metadata_file, "w") as f:
|
40
72
|
json.dump(self.metadata, f, ensure_ascii=False, indent=2)
|
41
73
|
|
42
|
-
def _get_file_path(self, key: str) -> Path:
|
43
|
-
"""Get data file path"""
|
44
|
-
|
74
|
+
def _get_file_path(self, key: str, data_type: Type = None) -> Path:
|
75
|
+
"""Get data file path with appropriate extension"""
|
76
|
+
if data_type is None:
|
77
|
+
# Try to get extension from metadata
|
78
|
+
if key in self.metadata and 'data_type' in self.metadata[key]:
|
79
|
+
stored_type_name = self.metadata[key]['data_type']
|
80
|
+
if stored_type_name == 'DataFrame':
|
81
|
+
extension = '.csv'
|
82
|
+
elif stored_type_name == 'dict':
|
83
|
+
extension = '.json'
|
84
|
+
elif stored_type_name == 'str':
|
85
|
+
extension = '.txt'
|
86
|
+
else:
|
87
|
+
# Try to find extension from registered handlers
|
88
|
+
extension = '.dat' # Default extension
|
89
|
+
for registered_type, handler in self.handlers.items():
|
90
|
+
if registered_type.__name__ == stored_type_name:
|
91
|
+
extension = handler.get_file_extension()
|
92
|
+
break
|
93
|
+
else:
|
94
|
+
extension = '.dat' # Default extension
|
95
|
+
else:
|
96
|
+
try:
|
97
|
+
handler = self._get_handler(data_type)
|
98
|
+
extension = handler.get_file_extension()
|
99
|
+
except ValueError:
|
100
|
+
extension = '.dat' # Default extension
|
101
|
+
|
102
|
+
return self.cache_dir / f"{key}{extension}"
|
45
103
|
|
46
104
|
def _is_expired(self, key: str) -> bool:
|
47
105
|
"""Check if data is expired"""
|
@@ -55,32 +113,27 @@ class DataFrameCache:
|
|
55
113
|
expire_time = datetime.fromisoformat(expire_time_str)
|
56
114
|
return datetime.now() > expire_time
|
57
115
|
|
58
|
-
def save(self, key: str,
|
59
|
-
**
|
116
|
+
def save(self, key: str, data: Union[pd.DataFrame, dict, Any], expire_hours: Optional[float] = None,
|
117
|
+
**handler_kwargs) -> bool:
|
60
118
|
"""
|
61
|
-
Save
|
62
|
-
|
119
|
+
Save data to cache
|
120
|
+
|
63
121
|
Args:
|
64
122
|
key: Cache key name
|
65
|
-
|
123
|
+
data: Data to save (DataFrame, dict, or other supported types)
|
66
124
|
expire_hours: Expiration time in hours, None means never expires
|
67
|
-
**
|
68
|
-
|
125
|
+
**handler_kwargs: Additional parameters passed to the data handler
|
126
|
+
|
69
127
|
Returns:
|
70
128
|
bool: Whether save was successful
|
71
129
|
"""
|
72
130
|
try:
|
73
|
-
|
131
|
+
data_type = type(data)
|
132
|
+
handler = self._get_handler(data_type)
|
133
|
+
file_path = self._get_file_path(key, data_type)
|
74
134
|
|
75
|
-
#
|
76
|
-
|
77
|
-
"index": False,
|
78
|
-
"encoding": "utf-8"
|
79
|
-
}
|
80
|
-
csv_params.update(csv_kwargs)
|
81
|
-
|
82
|
-
# Save CSV file
|
83
|
-
df.to_csv(file_path, **csv_params)
|
135
|
+
# Save data using appropriate handler
|
136
|
+
handler_metadata = handler.save(data, file_path, **handler_kwargs)
|
84
137
|
|
85
138
|
# Update metadata
|
86
139
|
current_time = datetime.now()
|
@@ -88,29 +141,28 @@ class DataFrameCache:
|
|
88
141
|
'created_time': current_time.isoformat(),
|
89
142
|
'updated_time': current_time.isoformat(),
|
90
143
|
'expire_time': (current_time + timedelta(hours=expire_hours)).isoformat() if expire_hours else None,
|
91
|
-
'
|
92
|
-
|
93
|
-
'column_count': len(df.columns)
|
144
|
+
'data_type': data_type.__name__,
|
145
|
+
**handler_metadata
|
94
146
|
}
|
95
147
|
|
96
148
|
self._save_metadata()
|
97
149
|
return True
|
98
150
|
|
99
151
|
except Exception as e:
|
100
|
-
print(f"Failed to save
|
152
|
+
print(f"Failed to save data: {e}")
|
101
153
|
return False
|
102
154
|
|
103
|
-
def load(self, key: str, auto_clean_expired: bool = True, **
|
155
|
+
def load(self, key: str, auto_clean_expired: bool = True, **handler_kwargs) -> Optional[Any]:
|
104
156
|
"""
|
105
|
-
Load
|
106
|
-
|
157
|
+
Load data from cache
|
158
|
+
|
107
159
|
Args:
|
108
160
|
key: Cache key name
|
109
161
|
auto_clean_expired: Whether to automatically clean expired data
|
110
|
-
**
|
111
|
-
|
162
|
+
**handler_kwargs: Additional parameters passed to the data handler
|
163
|
+
|
112
164
|
Returns:
|
113
|
-
Optional[
|
165
|
+
Optional[Any]: Loaded data, returns None if not exists or expired
|
114
166
|
"""
|
115
167
|
try:
|
116
168
|
# Check if expired
|
@@ -124,34 +176,56 @@ class DataFrameCache:
|
|
124
176
|
if not file_path.exists():
|
125
177
|
return None
|
126
178
|
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
179
|
+
# Get data type from metadata
|
180
|
+
if key not in self.metadata or 'data_type' not in self.metadata[key]:
|
181
|
+
print(f"No data type information found for key '{key}'")
|
182
|
+
return None
|
183
|
+
|
184
|
+
data_type_name = self.metadata[key]['data_type']
|
185
|
+
|
186
|
+
# Map type name back to actual type
|
187
|
+
if data_type_name == 'DataFrame':
|
188
|
+
data_type = pd.DataFrame
|
189
|
+
elif data_type_name == 'dict':
|
190
|
+
data_type = dict
|
191
|
+
elif data_type_name == 'str':
|
192
|
+
data_type = str
|
193
|
+
else:
|
194
|
+
# For other custom types, try to find a handler by checking registered types
|
195
|
+
data_type = None
|
196
|
+
for registered_type in self.handlers.keys():
|
197
|
+
if registered_type.__name__ == data_type_name:
|
198
|
+
data_type = registered_type
|
199
|
+
break
|
200
|
+
|
201
|
+
if data_type is None:
|
202
|
+
print(f"Unknown data type: {data_type_name}")
|
203
|
+
return None
|
132
204
|
|
133
|
-
|
134
|
-
|
205
|
+
handler = self._get_handler(data_type)
|
206
|
+
|
207
|
+
# Load data using appropriate handler
|
208
|
+
data = handler.load(file_path, **handler_kwargs)
|
135
209
|
|
136
210
|
# Update last access time
|
137
211
|
if key in self.metadata:
|
138
212
|
self.metadata[key]['last_accessed'] = datetime.now().isoformat()
|
139
213
|
self._save_metadata()
|
140
214
|
|
141
|
-
return
|
215
|
+
return data
|
142
216
|
|
143
217
|
except Exception as e:
|
144
|
-
print(f"Failed to load
|
218
|
+
print(f"Failed to load data: {e}")
|
145
219
|
return None
|
146
220
|
|
147
221
|
def exists(self, key: str, check_expired: bool = True) -> bool:
|
148
222
|
"""
|
149
223
|
Check if cache exists
|
150
|
-
|
224
|
+
|
151
225
|
Args:
|
152
226
|
key: Cache key name
|
153
227
|
check_expired: Whether to check expiration status
|
154
|
-
|
228
|
+
|
155
229
|
Returns:
|
156
230
|
bool: Whether cache exists and is not expired
|
157
231
|
"""
|
@@ -164,17 +238,17 @@ class DataFrameCache:
|
|
164
238
|
def delete(self, key: str) -> bool:
|
165
239
|
"""
|
166
240
|
Delete cache
|
167
|
-
|
241
|
+
|
168
242
|
Args:
|
169
243
|
key: Cache key name
|
170
|
-
|
244
|
+
|
171
245
|
Returns:
|
172
246
|
bool: Whether deletion was successful
|
173
247
|
"""
|
174
248
|
try:
|
175
249
|
file_path = self._get_file_path(key)
|
176
250
|
|
177
|
-
# Delete
|
251
|
+
# Delete data file
|
178
252
|
if file_path.exists():
|
179
253
|
file_path.unlink()
|
180
254
|
|
@@ -192,7 +266,7 @@ class DataFrameCache:
|
|
192
266
|
def clean_expired(self) -> int:
|
193
267
|
"""
|
194
268
|
Clean all expired caches
|
195
|
-
|
269
|
+
|
196
270
|
Returns:
|
197
271
|
int: Number of cleaned caches
|
198
272
|
"""
|
@@ -212,10 +286,10 @@ class DataFrameCache:
|
|
212
286
|
def get_info(self, key: str) -> Optional[Dict[str, Any]]:
|
213
287
|
"""
|
214
288
|
Get cache information
|
215
|
-
|
289
|
+
|
216
290
|
Args:
|
217
291
|
key: Cache key name
|
218
|
-
|
292
|
+
|
219
293
|
Returns:
|
220
294
|
Optional[Dict]: Cache information including creation time, update time, expiration time, etc.
|
221
295
|
"""
|
@@ -232,10 +306,10 @@ class DataFrameCache:
|
|
232
306
|
def list_all(self, include_expired: bool = False) -> Dict[str, Dict[str, Any]]:
|
233
307
|
"""
|
234
308
|
List all caches
|
235
|
-
|
309
|
+
|
236
310
|
Args:
|
237
311
|
include_expired: Whether to include expired caches
|
238
|
-
|
312
|
+
|
239
313
|
Returns:
|
240
314
|
Dict: Information of all caches
|
241
315
|
"""
|
@@ -254,7 +328,7 @@ class DataFrameCache:
|
|
254
328
|
def get_cache_stats(self) -> Dict[str, Any]:
|
255
329
|
"""
|
256
330
|
Get cache statistics
|
257
|
-
|
331
|
+
|
258
332
|
Returns:
|
259
333
|
Dict: Cache statistics information
|
260
334
|
"""
|
@@ -280,14 +354,15 @@ class DataFrameCache:
|
|
280
354
|
def clear_all(self) -> bool:
|
281
355
|
"""
|
282
356
|
Clear all caches
|
283
|
-
|
357
|
+
|
284
358
|
Returns:
|
285
359
|
bool: Whether clearing was successful
|
286
360
|
"""
|
287
361
|
try:
|
288
|
-
# Delete all CSV
|
289
|
-
for
|
290
|
-
|
362
|
+
# Delete all data files (CSV, JSON, and other supported formats)
|
363
|
+
for data_file in self.cache_dir.glob("*"):
|
364
|
+
if data_file.is_file() and data_file.name != "metadata.json":
|
365
|
+
data_file.unlink()
|
291
366
|
|
292
367
|
# Clear metadata
|
293
368
|
self.metadata = {}
|
@@ -298,34 +373,3 @@ class DataFrameCache:
|
|
298
373
|
except Exception as e:
|
299
374
|
print(f"Failed to clear cache: {e}")
|
300
375
|
return False
|
301
|
-
|
302
|
-
|
303
|
-
# Create default instance
|
304
|
-
default_cache = DataFrameCache()
|
305
|
-
|
306
|
-
|
307
|
-
# Convenience functions
|
308
|
-
def save_dataframe(key: str, df: pd.DataFrame, expire_hours: Optional[float] = None,
|
309
|
-
**csv_kwargs) -> bool:
|
310
|
-
"""Convenience function: Save DataFrame"""
|
311
|
-
return default_cache.save(key, df, expire_hours, **csv_kwargs)
|
312
|
-
|
313
|
-
|
314
|
-
def load_dataframe(key: str, **csv_kwargs) -> Optional[pd.DataFrame]:
|
315
|
-
"""Convenience function: Load DataFrame"""
|
316
|
-
return default_cache.load(key, **csv_kwargs)
|
317
|
-
|
318
|
-
|
319
|
-
def dataframe_exists(key: str) -> bool:
|
320
|
-
"""Convenience function: Check if DataFrame exists"""
|
321
|
-
return default_cache.exists(key)
|
322
|
-
|
323
|
-
|
324
|
-
def delete_dataframe(key: str) -> bool:
|
325
|
-
"""Convenience function: Delete DataFrame cache"""
|
326
|
-
return default_cache.delete(key)
|
327
|
-
|
328
|
-
|
329
|
-
def clean_expired_dataframes() -> int:
|
330
|
-
"""Convenience function: Clean expired DataFrame caches"""
|
331
|
-
return default_cache.clean_expired()
|
@@ -1,3 +1,3 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
1
|
+
from .chroma_vector_store import ChromaVectorStore
|
2
|
+
from .es_vector_store import EsVectorStore
|
3
|
+
from .local_vector_store import LocalVectorStore
|
@@ -13,7 +13,7 @@ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
|
|
13
13
|
|
14
14
|
@C.register_vector_store("elasticsearch")
|
15
15
|
class EsVectorStore(LocalVectorStore):
|
16
|
-
hosts: str | List[str] = Field(default_factory=lambda: os.getenv("
|
16
|
+
hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
|
17
17
|
basic_auth: str | Tuple[str, str] | None = Field(default=None)
|
18
18
|
retrieve_filters: List[dict] = []
|
19
19
|
_client: Elasticsearch = PrivateAttr()
|
@@ -160,7 +160,6 @@ class EsVectorStore(LocalVectorStore):
|
|
160
160
|
if refresh:
|
161
161
|
self.refresh(workspace_id=workspace_id)
|
162
162
|
|
163
|
-
|
164
163
|
def main():
|
165
164
|
from flowllm.utils.common_utils import load_env
|
166
165
|
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
@@ -203,7 +203,6 @@ class LocalVectorStore(BaseVectorStore):
|
|
203
203
|
self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
|
204
204
|
logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
|
205
205
|
|
206
|
-
|
207
206
|
def main():
|
208
207
|
from flowllm.utils.common_utils import load_env
|
209
208
|
from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
|
flowllm/utils/common_utils.py
CHANGED
@@ -40,25 +40,13 @@ def load_env(path: str | Path = None):
|
|
40
40
|
path = Path(path)
|
41
41
|
if path.exists():
|
42
42
|
_load_env(path)
|
43
|
-
else:
|
44
|
-
path1 = Path(".env")
|
45
|
-
path2 = Path("../.env")
|
46
|
-
path3 = Path("../../.env")
|
47
|
-
path4 = Path("../../../.env")
|
48
|
-
path5 = Path("../../../.env")
|
49
|
-
|
50
|
-
if path1.exists():
|
51
|
-
path = path1
|
52
|
-
elif path2.exists():
|
53
|
-
path = path2
|
54
|
-
elif path3.exists():
|
55
|
-
path = path3
|
56
|
-
elif path4.exists():
|
57
|
-
path = path4
|
58
|
-
elif path5.exists():
|
59
|
-
path = path5
|
60
|
-
else:
|
61
|
-
raise FileNotFoundError(".env not found")
|
62
43
|
|
63
|
-
|
64
|
-
|
44
|
+
else:
|
45
|
+
for i in range(5):
|
46
|
+
path = Path("../" * i + ".env")
|
47
|
+
if path.exists():
|
48
|
+
logger.info(f"using path={path}")
|
49
|
+
_load_env(path)
|
50
|
+
return
|
51
|
+
|
52
|
+
raise FileNotFoundError(".env not found")
|
flowllm/utils/fetch_url.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
import requests
|
2
|
-
from bs4 import BeautifulSoup
|
3
|
-
import urllib3
|
4
1
|
import random
|
5
2
|
import time
|
6
3
|
|
4
|
+
import requests
|
5
|
+
import urllib3
|
6
|
+
from bs4 import BeautifulSoup
|
7
|
+
|
7
8
|
# Disable SSL warnings (optional, for handling insecure HTTPS)
|
8
9
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
9
10
|
|
11
|
+
|
10
12
|
def get_random_headers():
|
11
13
|
"""Generate random headers to avoid detection"""
|
12
14
|
user_agents = [
|
@@ -21,7 +23,7 @@ def get_random_headers():
|
|
21
23
|
'Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
|
22
24
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
|
23
25
|
]
|
24
|
-
|
26
|
+
|
25
27
|
accept_languages = [
|
26
28
|
'en-US,en;q=0.9',
|
27
29
|
'zh-CN,zh;q=0.9,en;q=0.8',
|
@@ -29,13 +31,13 @@ def get_random_headers():
|
|
29
31
|
'fr-FR,fr;q=0.9,en;q=0.8',
|
30
32
|
'de-DE,de;q=0.9,en;q=0.8'
|
31
33
|
]
|
32
|
-
|
34
|
+
|
33
35
|
accept_encodings = [
|
34
36
|
'gzip, deflate, br',
|
35
37
|
'gzip, deflate',
|
36
38
|
'br, gzip, deflate'
|
37
39
|
]
|
38
|
-
|
40
|
+
|
39
41
|
headers = {
|
40
42
|
'User-Agent': random.choice(user_agents),
|
41
43
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
@@ -44,7 +46,7 @@ def get_random_headers():
|
|
44
46
|
'Connection': 'keep-alive',
|
45
47
|
'Upgrade-Insecure-Requests': '1',
|
46
48
|
}
|
47
|
-
|
49
|
+
|
48
50
|
# Randomly add some optional headers
|
49
51
|
if random.random() > 0.5:
|
50
52
|
headers['DNT'] = '1'
|
@@ -54,9 +56,10 @@ def get_random_headers():
|
|
54
56
|
headers['Sec-Fetch-Dest'] = 'document'
|
55
57
|
headers['Sec-Fetch-Mode'] = 'navigate'
|
56
58
|
headers['Sec-Fetch-Site'] = 'none'
|
57
|
-
|
59
|
+
|
58
60
|
return headers
|
59
61
|
|
62
|
+
|
60
63
|
def fetch_webpage_text(url, min_delay=1, max_delay=3):
|
61
64
|
"""
|
62
65
|
Fetch and extract text content from a webpage with randomization
|
@@ -72,12 +75,12 @@ def fetch_webpage_text(url, min_delay=1, max_delay=3):
|
|
72
75
|
# Add random delay to avoid being detected as bot
|
73
76
|
delay = random.uniform(min_delay, max_delay)
|
74
77
|
time.sleep(delay)
|
75
|
-
|
78
|
+
|
76
79
|
headers = get_random_headers()
|
77
|
-
|
80
|
+
|
78
81
|
# Random timeout between 8-15 seconds
|
79
82
|
timeout = random.randint(8, 15)
|
80
|
-
|
83
|
+
|
81
84
|
try:
|
82
85
|
# Send request with random headers and timeout
|
83
86
|
response = requests.get(url, headers=headers, timeout=timeout, verify=False)
|
@@ -106,8 +109,9 @@ def fetch_webpage_text(url, min_delay=1, max_delay=3):
|
|
106
109
|
except Exception as e:
|
107
110
|
return f"Parsing failed: {e}"
|
108
111
|
|
112
|
+
|
109
113
|
# Example usage
|
110
114
|
if __name__ == "__main__":
|
111
115
|
url = "http://finance.eastmoney.com/a/202508133482756869.html"
|
112
116
|
text = fetch_webpage_text(url)
|
113
|
-
print(text)
|
117
|
+
print(text)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from flowllm.enumeration.role import Role
|
4
|
+
from flowllm.schema.message import Message
|
5
|
+
|
6
|
+
|
7
|
+
def merge_messages_content(messages: List[Message | dict]) -> str:
|
8
|
+
content_collector = []
|
9
|
+
for i, message in enumerate(messages):
|
10
|
+
if isinstance(message, dict):
|
11
|
+
message = Message(**message)
|
12
|
+
|
13
|
+
if message.role is Role.ASSISTANT:
|
14
|
+
line = f"### step.{i} role={message.role.value} content=\n{message.reasoning_content}\n\n{message.content}\n"
|
15
|
+
if message.tool_calls:
|
16
|
+
for tool_call in message.tool_calls:
|
17
|
+
line += f" - tool call={tool_call.name}\n params={tool_call.arguments}\n"
|
18
|
+
content_collector.append(line)
|
19
|
+
|
20
|
+
elif message.role is Role.USER:
|
21
|
+
line = f"### step.{i} role={message.role.value} content=\n{message.content}\n"
|
22
|
+
content_collector.append(line)
|
23
|
+
|
24
|
+
elif message.role is Role.TOOL:
|
25
|
+
line = f"### step.{i} role={message.role.value} tool call result=\n{message.content}\n"
|
26
|
+
content_collector.append(line)
|
27
|
+
|
28
|
+
return "\n".join(content_collector)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from sklearn.linear_model import Ridge
|
3
|
+
from sklearn.preprocessing import StandardScaler
|
4
|
+
|
5
|
+
|
6
|
+
class RidgeV2:
|
7
|
+
|
8
|
+
def __init__(self, bound: int = 4.6, use_ridge_v2=True, **kwargs):
|
9
|
+
self.bound: int = bound
|
10
|
+
self.use_ridge_v2: bool = use_ridge_v2
|
11
|
+
self.kwargs: dict = kwargs
|
12
|
+
self.model = Ridge(**self.kwargs)
|
13
|
+
|
14
|
+
def clear(self):
|
15
|
+
self.model = Ridge(**self.kwargs)
|
16
|
+
return self
|
17
|
+
|
18
|
+
def sigmoid(self, x):
|
19
|
+
x = np.asarray(x, dtype=float)
|
20
|
+
x = np.clip(x, -self.bound, self.bound)
|
21
|
+
return 1 / (1 + np.exp(-x))
|
22
|
+
|
23
|
+
def inv_sigmoid(self, p):
|
24
|
+
p = np.asarray(p, dtype=float)
|
25
|
+
p = np.clip(p, self.sigmoid(-self.bound), self.sigmoid(self.bound))
|
26
|
+
return np.log(p / (1 - p))
|
27
|
+
|
28
|
+
def fit(self, x, y, sample_weight=None):
|
29
|
+
if self.use_ridge_v2:
|
30
|
+
return self.model.fit(x, self.inv_sigmoid(y), sample_weight=sample_weight)
|
31
|
+
else:
|
32
|
+
return self.model.fit(x, y, sample_weight=sample_weight)
|
33
|
+
|
34
|
+
def predict(self, x):
|
35
|
+
if self.use_ridge_v2:
|
36
|
+
return self.sigmoid(self.model.predict(x))
|
37
|
+
else:
|
38
|
+
return self.model.predict(x)
|
39
|
+
|
40
|
+
def fit_and_predict(self,
|
41
|
+
train_x_nd: np.ndarray,
|
42
|
+
train_y_nd: np.ndarray,
|
43
|
+
test_x_nd: np.ndarray,
|
44
|
+
check_y: bool = True):
|
45
|
+
if check_y:
|
46
|
+
assert np.all((train_y_nd >= 0) & (train_y_nd <= 1))
|
47
|
+
|
48
|
+
scaler = StandardScaler()
|
49
|
+
scaler.fit(train_x_nd)
|
50
|
+
train_x_nd = scaler.transform(train_x_nd)
|
51
|
+
test_x_nd = scaler.transform(test_x_nd)
|
52
|
+
self.model.fit(train_x_nd, train_y_nd)
|
53
|
+
pred_y_nd = self.model.predict(test_x_nd)
|
54
|
+
return np.minimum(np.maximum(pred_y_nd, 0), 1)
|