flowllm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. flowllm/__init__.py +19 -6
  2. flowllm/app.py +4 -14
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/{default_config.yaml → default.yaml} +3 -8
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +17 -17
  12. flowllm/context/base_context.py +27 -7
  13. flowllm/context/flow_context.py +6 -18
  14. flowllm/context/registry.py +5 -1
  15. flowllm/context/service_context.py +83 -37
  16. flowllm/embedding_model/__init__.py +1 -1
  17. flowllm/embedding_model/base_embedding_model.py +91 -0
  18. flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
  19. flowllm/flow/__init__.py +1 -0
  20. flowllm/flow/base_flow.py +74 -0
  21. flowllm/flow/base_tool_flow.py +15 -0
  22. flowllm/flow/gallery/__init__.py +8 -0
  23. flowllm/flow/gallery/cmd_flow.py +11 -0
  24. flowllm/flow/gallery/code_tool_flow.py +30 -0
  25. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  26. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  27. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  28. flowllm/flow/gallery/mock_tool_flow.py +62 -0
  29. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  30. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  31. flowllm/flow/parser/__init__.py +0 -0
  32. flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
  33. flowllm/llm/__init__.py +2 -1
  34. flowllm/llm/base_llm.py +94 -4
  35. flowllm/llm/litellm_llm.py +456 -0
  36. flowllm/llm/openai_compatible_llm.py +205 -5
  37. flowllm/op/__init__.py +12 -3
  38. flowllm/op/agent/__init__.py +1 -0
  39. flowllm/op/agent/react_v1_op.py +109 -0
  40. flowllm/op/agent/react_v1_prompt.yaml +54 -0
  41. flowllm/op/agent/react_v2_op.py +86 -0
  42. flowllm/op/agent/react_v2_prompt.yaml +35 -0
  43. flowllm/op/akshare/__init__.py +3 -0
  44. flowllm/op/akshare/get_ak_a_code_op.py +14 -22
  45. flowllm/op/akshare/get_ak_a_info_op.py +17 -20
  46. flowllm/op/{llm_base_op.py → base_llm_op.py} +7 -5
  47. flowllm/op/base_op.py +40 -44
  48. flowllm/op/base_ray_op.py +313 -0
  49. flowllm/op/code/__init__.py +1 -0
  50. flowllm/op/code/execute_code_op.py +42 -0
  51. flowllm/op/gallery/__init__.py +2 -0
  52. flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
  53. flowllm/op/gallery/terminate_op.py +29 -0
  54. flowllm/op/parallel_op.py +2 -9
  55. flowllm/op/search/__init__.py +3 -0
  56. flowllm/op/search/dashscope_deep_research_op.py +267 -0
  57. flowllm/op/search/dashscope_search_op.py +186 -0
  58. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  59. flowllm/op/search/tavily_search_op.py +109 -0
  60. flowllm/op/sequential_op.py +1 -9
  61. flowllm/schema/flow_request.py +12 -0
  62. flowllm/schema/message.py +2 -0
  63. flowllm/schema/service_config.py +12 -16
  64. flowllm/schema/tool_call.py +20 -8
  65. flowllm/schema/vector_node.py +1 -0
  66. flowllm/service/__init__.py +3 -2
  67. flowllm/service/base_service.py +50 -41
  68. flowllm/service/cmd_service.py +15 -0
  69. flowllm/service/http_service.py +34 -42
  70. flowllm/service/mcp_service.py +13 -11
  71. flowllm/storage/cache/__init__.py +1 -0
  72. flowllm/storage/cache/cache_data_handler.py +104 -0
  73. flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
  74. flowllm/storage/vector_store/__init__.py +3 -3
  75. flowllm/storage/vector_store/base_vector_store.py +3 -0
  76. flowllm/storage/vector_store/es_vector_store.py +4 -5
  77. flowllm/storage/vector_store/local_vector_store.py +0 -1
  78. flowllm/utils/common_utils.py +9 -21
  79. flowllm/utils/fetch_url.py +16 -12
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/logger_utils.py +28 -0
  82. flowllm/utils/ridge_v2.py +54 -0
  83. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/METADATA +43 -390
  84. flowllm-0.1.3.dist-info/RECORD +102 -0
  85. flowllm-0.1.3.dist-info/entry_points.txt +2 -0
  86. flowllm/flow_engine/__init__.py +0 -1
  87. flowllm/flow_engine/base_flow_engine.py +0 -34
  88. flowllm-0.1.1.dist-info/RECORD +0 -62
  89. flowllm-0.1.1.dist-info/entry_points.txt +0 -4
  90. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/WHEEL +0 -0
  91. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/licenses/LICENSE +0 -0
  92. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/top_level.txt +0 -0
@@ -1,33 +1,65 @@
1
1
  """
2
- DataFrame cache utility that supports local CSV file storage and reading with data expiration functionality
2
+ DataCache utility that supports multiple data types with local storage and data expiration functionality
3
3
  """
4
4
 
5
5
  import json
6
6
  from datetime import datetime, timedelta
7
7
  from pathlib import Path
8
- from typing import Optional, Dict, Any
8
+ from typing import Optional, Dict, Any, Union, Type
9
9
 
10
10
  import pandas as pd
11
11
 
12
+ from flowllm.storage.cache.cache_data_handler import CacheDataHandler, DataFrameHandler, DictHandler
12
13
 
13
- class DataFrameCache:
14
+
15
+ class DataCache:
14
16
  """
15
- DataFrame cache utility class
16
-
17
+ Generic data cache utility class
18
+
17
19
  Features:
18
- - Support for pandas DataFrame local CSV storage and reading
20
+ - Support for multiple data types (DataFrame, dict, and extensible for others)
19
21
  - Support for data expiration time settings
20
22
  - Automatic cleanup of expired data
21
23
  - Recording and managing update timestamps
24
+ - Type-specific storage formats (CSV for DataFrame, JSON for dict)
22
25
  """
23
26
 
24
- def __init__(self, cache_dir: str = "cache_df"):
27
+ def __init__(self, cache_dir: str = "cache"):
25
28
  self.cache_dir = Path(cache_dir)
26
29
  self.cache_dir.mkdir(parents=True, exist_ok=True)
27
30
  self.metadata_file = self.cache_dir / "metadata.json"
28
31
  self.metadata = {}
32
+
33
+ # Register default handlers
34
+ self.handlers: Dict[Type, CacheDataHandler] = {
35
+ pd.DataFrame: DataFrameHandler(),
36
+ dict: DictHandler()
37
+ }
38
+
29
39
  self._load_metadata()
30
40
 
41
+ def register_handler(self, data_type: Type, handler: CacheDataHandler):
42
+ """
43
+ Register a custom data handler for a specific data type
44
+
45
+ Args:
46
+ data_type: The data type to handle
47
+ handler: The handler instance
48
+ """
49
+ self.handlers[data_type] = handler
50
+
51
+ def _get_handler(self, data_type: Type) -> CacheDataHandler:
52
+ """Get the appropriate handler for a data type"""
53
+ if data_type in self.handlers:
54
+ return self.handlers[data_type]
55
+
56
+ # Try to find a handler for parent classes
57
+ for registered_type, handler in self.handlers.items():
58
+ if issubclass(data_type, registered_type):
59
+ return handler
60
+
61
+ raise ValueError(f"No handler registered for data type: {data_type}")
62
+
31
63
  def _load_metadata(self):
32
64
  """Load metadata"""
33
65
  if self.metadata_file.exists():
@@ -39,9 +71,35 @@ class DataFrameCache:
39
71
  with open(self.metadata_file, "w") as f:
40
72
  json.dump(self.metadata, f, ensure_ascii=False, indent=2)
41
73
 
42
- def _get_file_path(self, key: str) -> Path:
43
- """Get data file path"""
44
- return self.cache_dir / f"{key}.csv"
74
+ def _get_file_path(self, key: str, data_type: Type = None) -> Path:
75
+ """Get data file path with appropriate extension"""
76
+ if data_type is None:
77
+ # Try to get extension from metadata
78
+ if key in self.metadata and 'data_type' in self.metadata[key]:
79
+ stored_type_name = self.metadata[key]['data_type']
80
+ if stored_type_name == 'DataFrame':
81
+ extension = '.csv'
82
+ elif stored_type_name == 'dict':
83
+ extension = '.json'
84
+ elif stored_type_name == 'str':
85
+ extension = '.txt'
86
+ else:
87
+ # Try to find extension from registered handlers
88
+ extension = '.dat' # Default extension
89
+ for registered_type, handler in self.handlers.items():
90
+ if registered_type.__name__ == stored_type_name:
91
+ extension = handler.get_file_extension()
92
+ break
93
+ else:
94
+ extension = '.dat' # Default extension
95
+ else:
96
+ try:
97
+ handler = self._get_handler(data_type)
98
+ extension = handler.get_file_extension()
99
+ except ValueError:
100
+ extension = '.dat' # Default extension
101
+
102
+ return self.cache_dir / f"{key}{extension}"
45
103
 
46
104
  def _is_expired(self, key: str) -> bool:
47
105
  """Check if data is expired"""
@@ -55,32 +113,27 @@ class DataFrameCache:
55
113
  expire_time = datetime.fromisoformat(expire_time_str)
56
114
  return datetime.now() > expire_time
57
115
 
58
- def save(self, key: str, df: pd.DataFrame, expire_hours: Optional[float] = None,
59
- **csv_kwargs) -> bool:
116
+ def save(self, key: str, data: Union[pd.DataFrame, dict, Any], expire_hours: Optional[float] = None,
117
+ **handler_kwargs) -> bool:
60
118
  """
61
- Save DataFrame to cache
62
-
119
+ Save data to cache
120
+
63
121
  Args:
64
122
  key: Cache key name
65
- df: DataFrame to save
123
+ data: Data to save (DataFrame, dict, or other supported types)
66
124
  expire_hours: Expiration time in hours, None means never expires
67
- **csv_kwargs: Additional parameters passed to pandas to_csv
68
-
125
+ **handler_kwargs: Additional parameters passed to the data handler
126
+
69
127
  Returns:
70
128
  bool: Whether save was successful
71
129
  """
72
130
  try:
73
- file_path = self._get_file_path(key)
131
+ data_type = type(data)
132
+ handler = self._get_handler(data_type)
133
+ file_path = self._get_file_path(key, data_type)
74
134
 
75
- # Set default CSV parameters
76
- csv_params = {
77
- "index": False,
78
- "encoding": "utf-8"
79
- }
80
- csv_params.update(csv_kwargs)
81
-
82
- # Save CSV file
83
- df.to_csv(file_path, **csv_params)
135
+ # Save data using appropriate handler
136
+ handler_metadata = handler.save(data, file_path, **handler_kwargs)
84
137
 
85
138
  # Update metadata
86
139
  current_time = datetime.now()
@@ -88,29 +141,28 @@ class DataFrameCache:
88
141
  'created_time': current_time.isoformat(),
89
142
  'updated_time': current_time.isoformat(),
90
143
  'expire_time': (current_time + timedelta(hours=expire_hours)).isoformat() if expire_hours else None,
91
- 'file_size': file_path.stat().st_size,
92
- 'row_count': len(df),
93
- 'column_count': len(df.columns)
144
+ 'data_type': data_type.__name__,
145
+ **handler_metadata
94
146
  }
95
147
 
96
148
  self._save_metadata()
97
149
  return True
98
150
 
99
151
  except Exception as e:
100
- print(f"Failed to save DataFrame: {e}")
152
+ print(f"Failed to save data: {e}")
101
153
  return False
102
154
 
103
- def load(self, key: str, auto_clean_expired: bool = True, **csv_kwargs) -> Optional[pd.DataFrame]:
155
+ def load(self, key: str, auto_clean_expired: bool = True, **handler_kwargs) -> Optional[Any]:
104
156
  """
105
- Load DataFrame from cache
106
-
157
+ Load data from cache
158
+
107
159
  Args:
108
160
  key: Cache key name
109
161
  auto_clean_expired: Whether to automatically clean expired data
110
- **csv_kwargs: Additional parameters passed to pandas read_csv
111
-
162
+ **handler_kwargs: Additional parameters passed to the data handler
163
+
112
164
  Returns:
113
- Optional[pd.DataFrame]: Loaded DataFrame, returns None if not exists or expired
165
+ Optional[Any]: Loaded data, returns None if not exists or expired
114
166
  """
115
167
  try:
116
168
  # Check if expired
@@ -124,34 +176,56 @@ class DataFrameCache:
124
176
  if not file_path.exists():
125
177
  return None
126
178
 
127
- # Set default CSV parameters
128
- csv_params = {
129
- 'encoding': 'utf-8'
130
- }
131
- csv_params.update(csv_kwargs)
179
+ # Get data type from metadata
180
+ if key not in self.metadata or 'data_type' not in self.metadata[key]:
181
+ print(f"No data type information found for key '{key}'")
182
+ return None
183
+
184
+ data_type_name = self.metadata[key]['data_type']
185
+
186
+ # Map type name back to actual type
187
+ if data_type_name == 'DataFrame':
188
+ data_type = pd.DataFrame
189
+ elif data_type_name == 'dict':
190
+ data_type = dict
191
+ elif data_type_name == 'str':
192
+ data_type = str
193
+ else:
194
+ # For other custom types, try to find a handler by checking registered types
195
+ data_type = None
196
+ for registered_type in self.handlers.keys():
197
+ if registered_type.__name__ == data_type_name:
198
+ data_type = registered_type
199
+ break
200
+
201
+ if data_type is None:
202
+ print(f"Unknown data type: {data_type_name}")
203
+ return None
132
204
 
133
- # Read CSV file
134
- df = pd.read_csv(file_path, **csv_params)
205
+ handler = self._get_handler(data_type)
206
+
207
+ # Load data using appropriate handler
208
+ data = handler.load(file_path, **handler_kwargs)
135
209
 
136
210
  # Update last access time
137
211
  if key in self.metadata:
138
212
  self.metadata[key]['last_accessed'] = datetime.now().isoformat()
139
213
  self._save_metadata()
140
214
 
141
- return df
215
+ return data
142
216
 
143
217
  except Exception as e:
144
- print(f"Failed to load DataFrame: {e}")
218
+ print(f"Failed to load data: {e}")
145
219
  return None
146
220
 
147
221
  def exists(self, key: str, check_expired: bool = True) -> bool:
148
222
  """
149
223
  Check if cache exists
150
-
224
+
151
225
  Args:
152
226
  key: Cache key name
153
227
  check_expired: Whether to check expiration status
154
-
228
+
155
229
  Returns:
156
230
  bool: Whether cache exists and is not expired
157
231
  """
@@ -164,17 +238,17 @@ class DataFrameCache:
164
238
  def delete(self, key: str) -> bool:
165
239
  """
166
240
  Delete cache
167
-
241
+
168
242
  Args:
169
243
  key: Cache key name
170
-
244
+
171
245
  Returns:
172
246
  bool: Whether deletion was successful
173
247
  """
174
248
  try:
175
249
  file_path = self._get_file_path(key)
176
250
 
177
- # Delete CSV file
251
+ # Delete data file
178
252
  if file_path.exists():
179
253
  file_path.unlink()
180
254
 
@@ -192,7 +266,7 @@ class DataFrameCache:
192
266
  def clean_expired(self) -> int:
193
267
  """
194
268
  Clean all expired caches
195
-
269
+
196
270
  Returns:
197
271
  int: Number of cleaned caches
198
272
  """
@@ -212,10 +286,10 @@ class DataFrameCache:
212
286
  def get_info(self, key: str) -> Optional[Dict[str, Any]]:
213
287
  """
214
288
  Get cache information
215
-
289
+
216
290
  Args:
217
291
  key: Cache key name
218
-
292
+
219
293
  Returns:
220
294
  Optional[Dict]: Cache information including creation time, update time, expiration time, etc.
221
295
  """
@@ -232,10 +306,10 @@ class DataFrameCache:
232
306
  def list_all(self, include_expired: bool = False) -> Dict[str, Dict[str, Any]]:
233
307
  """
234
308
  List all caches
235
-
309
+
236
310
  Args:
237
311
  include_expired: Whether to include expired caches
238
-
312
+
239
313
  Returns:
240
314
  Dict: Information of all caches
241
315
  """
@@ -254,7 +328,7 @@ class DataFrameCache:
254
328
  def get_cache_stats(self) -> Dict[str, Any]:
255
329
  """
256
330
  Get cache statistics
257
-
331
+
258
332
  Returns:
259
333
  Dict: Cache statistics information
260
334
  """
@@ -280,14 +354,15 @@ class DataFrameCache:
280
354
  def clear_all(self) -> bool:
281
355
  """
282
356
  Clear all caches
283
-
357
+
284
358
  Returns:
285
359
  bool: Whether clearing was successful
286
360
  """
287
361
  try:
288
- # Delete all CSV files
289
- for csv_file in self.cache_dir.glob("*.csv"):
290
- csv_file.unlink()
362
+ # Delete all data files (CSV, JSON, and other supported formats)
363
+ for data_file in self.cache_dir.glob("*"):
364
+ if data_file.is_file() and data_file.name != "metadata.json":
365
+ data_file.unlink()
291
366
 
292
367
  # Clear metadata
293
368
  self.metadata = {}
@@ -298,34 +373,3 @@ class DataFrameCache:
298
373
  except Exception as e:
299
374
  print(f"Failed to clear cache: {e}")
300
375
  return False
301
-
302
-
303
- # Create default instance
304
- default_cache = DataFrameCache()
305
-
306
-
307
- # Convenience functions
308
- def save_dataframe(key: str, df: pd.DataFrame, expire_hours: Optional[float] = None,
309
- **csv_kwargs) -> bool:
310
- """Convenience function: Save DataFrame"""
311
- return default_cache.save(key, df, expire_hours, **csv_kwargs)
312
-
313
-
314
- def load_dataframe(key: str, **csv_kwargs) -> Optional[pd.DataFrame]:
315
- """Convenience function: Load DataFrame"""
316
- return default_cache.load(key, **csv_kwargs)
317
-
318
-
319
- def dataframe_exists(key: str) -> bool:
320
- """Convenience function: Check if DataFrame exists"""
321
- return default_cache.exists(key)
322
-
323
-
324
- def delete_dataframe(key: str) -> bool:
325
- """Convenience function: Delete DataFrame cache"""
326
- return default_cache.delete(key)
327
-
328
-
329
- def clean_expired_dataframes() -> int:
330
- """Convenience function: Clean expired DataFrame caches"""
331
- return default_cache.clean_expired()
@@ -1,3 +1,3 @@
1
- from flowllm.storage.vector_store.chroma_vector_store import ChromaVectorStore
2
- from flowllm.storage.vector_store.es_vector_store import EsVectorStore
3
- from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
1
+ from .chroma_vector_store import ChromaVectorStore
2
+ from .es_vector_store import EsVectorStore
3
+ from .local_vector_store import LocalVectorStore
@@ -24,6 +24,9 @@ class BaseVectorStore(BaseModel, ABC):
24
24
  def _iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
25
25
  raise NotImplementedError
26
26
 
27
+ def iter_workspace_nodes(self, workspace_id: str, **kwargs) -> Iterable[VectorNode]:
28
+ return self._iter_workspace_nodes(workspace_id, **kwargs)
29
+
27
30
  def dump_workspace(self, workspace_id: str, path: str | Path = "", callback_fn=None, **kwargs):
28
31
  raise NotImplementedError
29
32
 
@@ -13,7 +13,7 @@ from flowllm.storage.vector_store.local_vector_store import LocalVectorStore
13
13
 
14
14
  @C.register_vector_store("elasticsearch")
15
15
  class EsVectorStore(LocalVectorStore):
16
- hosts: str | List[str] = Field(default_factory=lambda: os.getenv("ES_HOSTS", "http://localhost:9200"))
16
+ hosts: str | List[str] = Field(default_factory=lambda: os.getenv("FLOW_ES_HOSTS", "http://localhost:9200"))
17
17
  basic_auth: str | Tuple[str, str] | None = Field(default=None)
18
18
  retrieve_filters: List[dict] = []
19
19
  _client: Elasticsearch = PrivateAttr()
@@ -62,7 +62,7 @@ class EsVectorStore(LocalVectorStore):
62
62
  node.workspace_id = workspace_id
63
63
  node.unique_id = doc["_id"]
64
64
  if "_score" in doc:
65
- node.metadata["_score"] = doc["_score"] - 1
65
+ node.metadata["score"] = doc["_score"] - 1
66
66
  return node
67
67
 
68
68
  def add_term_filter(self, key: str, value):
@@ -111,7 +111,7 @@ class EsVectorStore(LocalVectorStore):
111
111
  self.retrieve_filters.clear()
112
112
  return nodes
113
113
 
114
- def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = False, **kwargs):
114
+ def insert(self, nodes: VectorNode | List[VectorNode], workspace_id: str, refresh: bool = True, **kwargs):
115
115
  if not self.exist_workspace(workspace_id=workspace_id):
116
116
  self.create_workspace(workspace_id=workspace_id)
117
117
 
@@ -140,7 +140,7 @@ class EsVectorStore(LocalVectorStore):
140
140
  if refresh:
141
141
  self.refresh(workspace_id=workspace_id)
142
142
 
143
- def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = False, **kwargs):
143
+ def delete(self, node_ids: str | List[str], workspace_id: str, refresh: bool = True, **kwargs):
144
144
  if not self.exist_workspace(workspace_id=workspace_id):
145
145
  logger.warning(f"workspace_id={workspace_id} is not exists!")
146
146
  return
@@ -160,7 +160,6 @@ class EsVectorStore(LocalVectorStore):
160
160
  if refresh:
161
161
  self.refresh(workspace_id=workspace_id)
162
162
 
163
-
164
163
  def main():
165
164
  from flowllm.utils.common_utils import load_env
166
165
  from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -203,7 +203,6 @@ class LocalVectorStore(BaseVectorStore):
203
203
  self._dump_to_path(nodes=all_nodes, workspace_id=workspace_id, path=self.store_path, **kwargs)
204
204
  logger.info(f"delete workspace_id={workspace_id} before_size={before_size} after_size={after_size}")
205
205
 
206
-
207
206
  def main():
208
207
  from flowllm.utils.common_utils import load_env
209
208
  from flowllm.embedding_model import OpenAICompatibleEmbeddingModel
@@ -40,25 +40,13 @@ def load_env(path: str | Path = None):
40
40
  path = Path(path)
41
41
  if path.exists():
42
42
  _load_env(path)
43
- else:
44
- path1 = Path(".env")
45
- path2 = Path("../.env")
46
- path3 = Path("../../.env")
47
- path4 = Path("../../../.env")
48
- path5 = Path("../../../.env")
49
-
50
- if path1.exists():
51
- path = path1
52
- elif path2.exists():
53
- path = path2
54
- elif path3.exists():
55
- path = path3
56
- elif path4.exists():
57
- path = path4
58
- elif path5.exists():
59
- path = path5
60
- else:
61
- raise FileNotFoundError(".env not found")
62
43
 
63
- logger.info(f"using path={path}")
64
- _load_env(path)
44
+ else:
45
+ for i in range(5):
46
+ path = Path("../" * i + ".env")
47
+ if path.exists():
48
+ logger.info(f"using path={path}")
49
+ _load_env(path)
50
+ return
51
+
52
+ raise FileNotFoundError(".env not found")
@@ -1,12 +1,14 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
- import urllib3
4
1
  import random
5
2
  import time
6
3
 
4
+ import requests
5
+ import urllib3
6
+ from bs4 import BeautifulSoup
7
+
7
8
  # Disable SSL warnings (optional, for handling insecure HTTPS)
8
9
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
9
10
 
11
+
10
12
  def get_random_headers():
11
13
  """Generate random headers to avoid detection"""
12
14
  user_agents = [
@@ -21,7 +23,7 @@ def get_random_headers():
21
23
  'Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0',
22
24
  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59'
23
25
  ]
24
-
26
+
25
27
  accept_languages = [
26
28
  'en-US,en;q=0.9',
27
29
  'zh-CN,zh;q=0.9,en;q=0.8',
@@ -29,13 +31,13 @@ def get_random_headers():
29
31
  'fr-FR,fr;q=0.9,en;q=0.8',
30
32
  'de-DE,de;q=0.9,en;q=0.8'
31
33
  ]
32
-
34
+
33
35
  accept_encodings = [
34
36
  'gzip, deflate, br',
35
37
  'gzip, deflate',
36
38
  'br, gzip, deflate'
37
39
  ]
38
-
40
+
39
41
  headers = {
40
42
  'User-Agent': random.choice(user_agents),
41
43
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
@@ -44,7 +46,7 @@ def get_random_headers():
44
46
  'Connection': 'keep-alive',
45
47
  'Upgrade-Insecure-Requests': '1',
46
48
  }
47
-
49
+
48
50
  # Randomly add some optional headers
49
51
  if random.random() > 0.5:
50
52
  headers['DNT'] = '1'
@@ -54,9 +56,10 @@ def get_random_headers():
54
56
  headers['Sec-Fetch-Dest'] = 'document'
55
57
  headers['Sec-Fetch-Mode'] = 'navigate'
56
58
  headers['Sec-Fetch-Site'] = 'none'
57
-
59
+
58
60
  return headers
59
61
 
62
+
60
63
  def fetch_webpage_text(url, min_delay=1, max_delay=3):
61
64
  """
62
65
  Fetch and extract text content from a webpage with randomization
@@ -72,12 +75,12 @@ def fetch_webpage_text(url, min_delay=1, max_delay=3):
72
75
  # Add random delay to avoid being detected as bot
73
76
  delay = random.uniform(min_delay, max_delay)
74
77
  time.sleep(delay)
75
-
78
+
76
79
  headers = get_random_headers()
77
-
80
+
78
81
  # Random timeout between 8-15 seconds
79
82
  timeout = random.randint(8, 15)
80
-
83
+
81
84
  try:
82
85
  # Send request with random headers and timeout
83
86
  response = requests.get(url, headers=headers, timeout=timeout, verify=False)
@@ -106,8 +109,9 @@ def fetch_webpage_text(url, min_delay=1, max_delay=3):
106
109
  except Exception as e:
107
110
  return f"Parsing failed: {e}"
108
111
 
112
+
109
113
  # Example usage
110
114
  if __name__ == "__main__":
111
115
  url = "http://finance.eastmoney.com/a/202508133482756869.html"
112
116
  text = fetch_webpage_text(url)
113
- print(text)
117
+ print(text)
@@ -0,0 +1,28 @@
1
+ from typing import List
2
+
3
+ from flowllm.enumeration.role import Role
4
+ from flowllm.schema.message import Message
5
+
6
+
7
+ def merge_messages_content(messages: List[Message | dict]) -> str:
8
+ content_collector = []
9
+ for i, message in enumerate(messages):
10
+ if isinstance(message, dict):
11
+ message = Message(**message)
12
+
13
+ if message.role is Role.ASSISTANT:
14
+ line = f"### step.{i} role={message.role.value} content=\n{message.reasoning_content}\n\n{message.content}\n"
15
+ if message.tool_calls:
16
+ for tool_call in message.tool_calls:
17
+ line += f" - tool call={tool_call.name}\n params={tool_call.arguments}\n"
18
+ content_collector.append(line)
19
+
20
+ elif message.role is Role.USER:
21
+ line = f"### step.{i} role={message.role.value} content=\n{message.content}\n"
22
+ content_collector.append(line)
23
+
24
+ elif message.role is Role.TOOL:
25
+ line = f"### step.{i} role={message.role.value} tool call result=\n{message.content}\n"
26
+ content_collector.append(line)
27
+
28
+ return "\n".join(content_collector)
@@ -0,0 +1,28 @@
1
+ import os
2
+ import sys
3
+ from datetime import datetime
4
+
5
+
6
+ def init_logger():
7
+ from loguru import logger
8
+ logger.remove()
9
+
10
+ log_dir = "logs"
11
+ os.makedirs(log_dir, exist_ok=True)
12
+
13
+ current_ts = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
14
+
15
+ log_filename = f"{current_ts}.log"
16
+ log_filepath = os.path.join(log_dir, log_filename)
17
+
18
+ logger.add(log_filepath,
19
+ level="DEBUG",
20
+ rotation="00:00",
21
+ retention="7 days",
22
+ compression="zip",
23
+ encoding="utf-8")
24
+
25
+ logger.add(sink=sys.stdout,
26
+ level="INFO",
27
+ format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
28
+ colorize=True)