praisonaiagents 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,354 @@
1
+ """Newspaper tools for scraping and parsing news articles.
2
+
3
+ Usage:
4
+ from praisonaiagents.tools import newspaper_tools
5
+ article = newspaper_tools.get_article("https://example.com/article")
6
+ sources = newspaper_tools.get_news_sources("technology")
7
+ articles = newspaper_tools.get_articles_from_source("https://techcrunch.com")
8
+
9
+ or
10
+ from praisonaiagents.tools import get_article, get_news_sources
11
+ article = get_article("https://example.com/article")
12
+ """
13
+
14
+ import logging
15
+ from typing import List, Dict, Union, Optional, Any
16
+ from importlib import util
17
+ import json
18
+ from urllib.parse import urlparse
19
+
20
+ # Predefined list of popular news sources
21
+ POPULAR_NEWS_SOURCES = {
22
+ 'technology': [
23
+ 'https://techcrunch.com',
24
+ 'https://www.theverge.com',
25
+ 'https://www.wired.com',
26
+ 'https://www.engadget.com',
27
+ 'https://arstechnica.com'
28
+ ],
29
+ 'business': [
30
+ 'https://www.bloomberg.com',
31
+ 'https://www.reuters.com',
32
+ 'https://www.wsj.com',
33
+ 'https://www.ft.com',
34
+ 'https://www.cnbc.com'
35
+ ],
36
+ 'general': [
37
+ 'https://www.nytimes.com',
38
+ 'https://www.theguardian.com',
39
+ 'https://www.washingtonpost.com',
40
+ 'https://www.bbc.com',
41
+ 'https://www.cnn.com'
42
+ ],
43
+ 'sports': [
44
+ 'https://www.espn.com',
45
+ 'https://sports.yahoo.com',
46
+ 'https://www.cbssports.com',
47
+ 'https://www.skysports.com',
48
+ 'https://www.bleacherreport.com'
49
+ ],
50
+ 'entertainment': [
51
+ 'https://variety.com',
52
+ 'https://www.hollywoodreporter.com',
53
+ 'https://www.ew.com',
54
+ 'https://www.deadline.com',
55
+ 'https://www.imdb.com/news'
56
+ ],
57
+ 'science': [
58
+ 'https://www.scientificamerican.com',
59
+ 'https://www.sciencedaily.com',
60
+ 'https://www.newscientist.com',
61
+ 'https://www.sciencemag.org',
62
+ 'https://www.nature.com/news'
63
+ ]
64
+ }
65
+
66
+ class NewspaperTools:
67
+ """Tools for scraping and parsing news articles."""
68
+
69
+ def __init__(self):
70
+ """Initialize NewspaperTools and check for newspaper package."""
71
+ self._check_newspaper()
72
+
73
+ def _check_newspaper(self):
74
+ """Check if newspaper package is installed."""
75
+ if util.find_spec("newspaper") is None:
76
+ raise ImportError("newspaper3k package is not available. Please install it using: pip install newspaper3k")
77
+ global newspaper
78
+ import newspaper
79
+
80
+ def get_article(
81
+ self,
82
+ url: str,
83
+ language: str = 'en'
84
+ ) -> Dict[str, Any]:
85
+ """
86
+ Extract and parse a news article from a URL.
87
+
88
+ Args:
89
+ url: URL of the article
90
+ language: Language code (e.g., 'en' for English)
91
+
92
+ Returns:
93
+ Dict: Article information including title, text, authors, etc.
94
+ """
95
+ try:
96
+ from newspaper import Article, Config
97
+
98
+ # Configure article download
99
+ config = Config()
100
+ config.browser_user_agent = 'Mozilla/5.0'
101
+ config.language = language
102
+
103
+ # Download and parse article
104
+ article = Article(url, config=config)
105
+ article.download()
106
+ article.parse()
107
+
108
+ # Try to extract additional information
109
+ try:
110
+ article.nlp()
111
+ except Exception as e:
112
+ logging.warning(f"NLP processing failed: {str(e)}")
113
+
114
+ # Build response
115
+ response = {
116
+ "url": url,
117
+ "title": article.title,
118
+ "text": article.text,
119
+ "authors": article.authors,
120
+ "publish_date": article.publish_date.isoformat() if article.publish_date else None,
121
+ "top_image": article.top_image,
122
+ "images": list(article.images),
123
+ "movies": list(article.movies),
124
+ "source_domain": urlparse(url).netloc,
125
+ }
126
+
127
+ # Add NLP results if available
128
+ if hasattr(article, 'keywords') and article.keywords:
129
+ response["keywords"] = article.keywords
130
+ if hasattr(article, 'summary') and article.summary:
131
+ response["summary"] = article.summary
132
+
133
+ return response
134
+ except Exception as e:
135
+ error_msg = f"Error extracting article from {url}: {str(e)}"
136
+ logging.error(error_msg)
137
+ return {"error": error_msg}
138
+
139
+ def get_news_sources(
140
+ self,
141
+ category: Optional[str] = None,
142
+ language: str = 'en',
143
+ country: Optional[str] = None
144
+ ) -> Union[List[Dict[str, str]], Dict[str, str]]:
145
+ """
146
+ Get a list of news sources, optionally filtered by category.
147
+
148
+ Args:
149
+ category: Category to filter by (e.g., 'technology', 'sports')
150
+ language: Language code
151
+ country: Country code
152
+
153
+ Returns:
154
+ List[Dict] or Dict: List of news sources or error dict
155
+ """
156
+ try:
157
+ sources = []
158
+
159
+ # Get sources for the specified category or all categories
160
+ if category:
161
+ category = category.lower()
162
+ if category in POPULAR_NEWS_SOURCES:
163
+ urls = POPULAR_NEWS_SOURCES[category]
164
+ else:
165
+ urls = []
166
+ for cat_urls in POPULAR_NEWS_SOURCES.values():
167
+ urls.extend(cat_urls)
168
+ else:
169
+ urls = []
170
+ for cat_urls in POPULAR_NEWS_SOURCES.values():
171
+ urls.extend(cat_urls)
172
+
173
+ # Create source objects
174
+ for url in urls:
175
+ domain = urlparse(url).netloc
176
+ source = {
177
+ "url": url,
178
+ "domain": domain,
179
+ "name": domain.replace("www.", "").split(".")[0].title(),
180
+ "category": category if category else "general"
181
+ }
182
+ sources.append(source)
183
+
184
+ return sources
185
+ except Exception as e:
186
+ error_msg = f"Error getting news sources: {str(e)}"
187
+ logging.error(error_msg)
188
+ return {"error": error_msg}
189
+
190
+ def get_articles_from_source(
191
+ self,
192
+ source_url: str,
193
+ limit: int = 10,
194
+ language: str = 'en'
195
+ ) -> Union[List[Dict[str, Any]], Dict[str, str]]:
196
+ """
197
+ Get recent articles from a news source.
198
+
199
+ Args:
200
+ source_url: URL of the news source
201
+ limit: Maximum number of articles to return
202
+ language: Language code
203
+
204
+ Returns:
205
+ List[Dict] or Dict: List of articles or error dict
206
+ """
207
+ try:
208
+ from newspaper import Source, Config
209
+
210
+ # Configure source scraping
211
+ config = Config()
212
+ config.browser_user_agent = 'Mozilla/5.0'
213
+ config.language = language
214
+ config.fetch_images = False # Speed up processing
215
+
216
+ # Build news source
217
+ source = Source(source_url, config=config)
218
+ source.build()
219
+
220
+ # Get articles
221
+ articles = []
222
+ for article_url in source.article_urls()[:limit]:
223
+ try:
224
+ article = self.get_article(article_url, language)
225
+ if "error" not in article:
226
+ articles.append(article)
227
+ except Exception as e:
228
+ logging.warning(f"Error processing article {article_url}: {str(e)}")
229
+ continue
230
+
231
+ if len(articles) >= limit:
232
+ break
233
+
234
+ return articles
235
+ except Exception as e:
236
+ error_msg = f"Error getting articles from {source_url}: {str(e)}"
237
+ logging.error(error_msg)
238
+ return {"error": error_msg}
239
+
240
+ def get_trending_topics(
241
+ self,
242
+ sources: Optional[List[str]] = None,
243
+ limit: int = 10,
244
+ language: str = 'en'
245
+ ) -> Union[List[str], Dict[str, str]]:
246
+ """
247
+ Get trending topics across news sources.
248
+
249
+ Args:
250
+ sources: List of source URLs to analyze
251
+ limit: Maximum number of trending topics to return
252
+ language: Language code
253
+
254
+ Returns:
255
+ List[str] or Dict: List of trending topics or error dict
256
+ """
257
+ try:
258
+ from collections import Counter
259
+
260
+ # Use default sources if none provided
261
+ if not sources:
262
+ sources_data = self.get_news_sources(language=language)
263
+ if isinstance(sources_data, dict) and "error" in sources_data:
264
+ return sources_data
265
+ sources = [s["url"] for s in sources_data[:5]] # Use top 5 sources
266
+
267
+ # Collect keywords from articles
268
+ all_keywords = []
269
+ for source_url in sources:
270
+ try:
271
+ articles = self.get_articles_from_source(source_url, limit=5, language=language)
272
+ if isinstance(articles, list):
273
+ for article in articles:
274
+ if "keywords" in article:
275
+ all_keywords.extend(article["keywords"])
276
+ except Exception as e:
277
+ logging.warning(f"Error processing source {source_url}: {str(e)}")
278
+ continue
279
+
280
+ # Get most common keywords
281
+ trending = Counter(all_keywords).most_common(limit)
282
+ return [topic for topic, count in trending]
283
+ except Exception as e:
284
+ error_msg = f"Error getting trending topics: {str(e)}"
285
+ logging.error(error_msg)
286
+ return {"error": error_msg}
287
+
288
+ # Create instance for direct function access
289
+ _newspaper_tools = NewspaperTools()
290
+ get_article = _newspaper_tools.get_article
291
+ get_news_sources = _newspaper_tools.get_news_sources
292
+ get_articles_from_source = _newspaper_tools.get_articles_from_source
293
+ get_trending_topics = _newspaper_tools.get_trending_topics
294
+
295
+ if __name__ == "__main__":
296
+ # Example usage
297
+ print("\n==================================================")
298
+ print("NewspaperTools Demonstration")
299
+ print("==================================================\n")
300
+
301
+ # 1. Get news sources
302
+ print("1. Getting News Sources")
303
+ print("------------------------------")
304
+ tech_sources = get_news_sources("technology")
305
+ print("Technology news sources:")
306
+ if isinstance(tech_sources, list):
307
+ print(json.dumps(tech_sources[:3], indent=2)) # Show first 3 sources
308
+ else:
309
+ print(tech_sources) # Show error
310
+ print()
311
+
312
+ if isinstance(tech_sources, list) and tech_sources:
313
+ source_url = tech_sources[0]["url"]
314
+
315
+ # 2. Get articles from a source
316
+ print("2. Getting Articles from Source")
317
+ print("------------------------------")
318
+ articles = get_articles_from_source(source_url, limit=2)
319
+ print(f"Articles from {source_url}:")
320
+ if isinstance(articles, list):
321
+ for article in articles:
322
+ print(f"- {article['title']}")
323
+ if "summary" in article:
324
+ print(f" Summary: {article['summary'][:200]}...")
325
+ else:
326
+ print(articles) # Show error
327
+ print()
328
+
329
+ # 3. Get a single article
330
+ print("3. Getting Single Article")
331
+ print("------------------------------")
332
+ if isinstance(articles, list) and articles:
333
+ article_url = articles[0]["url"]
334
+ article = get_article(article_url)
335
+ if "error" not in article:
336
+ print(f"Article: {article['title']}")
337
+ if "summary" in article:
338
+ print(f"Summary: {article['summary'][:200]}...")
339
+ print(f"Authors: {', '.join(article['authors'])}")
340
+ print(f"Date: {article['publish_date']}")
341
+ else:
342
+ print(article) # Show error
343
+ print()
344
+
345
+ # 4. Get trending topics
346
+ print("4. Getting Trending Topics")
347
+ print("------------------------------")
348
+ topics = get_trending_topics([source_url], limit=5)
349
+ print("Trending topics:")
350
+ print(json.dumps(topics, indent=2))
351
+
352
+ print("\n==================================================")
353
+ print("Demonstration Complete")
354
+ print("==================================================")
@@ -0,0 +1,326 @@
1
+ """Pandas tools for data manipulation and analysis.
2
+
3
+ Usage:
4
+ from praisonaiagents.tools import pandas_tools
5
+ df = pandas_tools.read_csv("data.csv")
6
+ df = pandas_tools.filter_data(df, "column > 5")
7
+ summary = pandas_tools.get_summary(df)
8
+
9
+ or
10
+ from praisonaiagents.tools import read_csv, filter_data, get_summary
11
+ df = read_csv("data.csv")
12
+ """
13
+
14
+ import logging
15
+ from typing import List, Dict, Union, Optional, Any
16
+ from importlib import util
17
+ import json
18
+ import os
19
+
20
+ # Import pandas for type hints, but don't use it until we check it's installed
21
+ if util.find_spec("pandas") is not None:
22
+ import pandas as pd
23
+ import numpy as np
24
+ else:
25
+ # Create a placeholder for type hints
26
+ class pd:
27
+ DataFrame = None
28
+
29
+ def _convert_to_serializable(obj: Any) -> Any:
30
+ """Convert numpy/pandas types to JSON serializable Python types."""
31
+ if isinstance(obj, (np.integer, np.floating)):
32
+ return obj.item()
33
+ elif isinstance(obj, np.ndarray):
34
+ return obj.tolist()
35
+ elif isinstance(obj, pd.Series):
36
+ return obj.to_list()
37
+ elif isinstance(obj, pd.DataFrame):
38
+ return obj.to_dict(orient='records')
39
+ return obj
40
+
41
+ class PandasTools:
42
+ """Tools for data manipulation and analysis using pandas."""
43
+
44
+ def __init__(self):
45
+ """Initialize PandasTools and check for pandas installation."""
46
+ self._check_pandas()
47
+
48
+ def _check_pandas(self):
49
+ """Check if pandas is installed."""
50
+ if util.find_spec("pandas") is None:
51
+ raise ImportError("pandas is not available. Please install it using: pip install pandas")
52
+ global pd, np
53
+ import pandas as pd
54
+ import numpy as np
55
+
56
+ def read_csv(self, filepath: str, **kwargs) -> Union[pd.DataFrame, Dict[str, str]]:
57
+ """
58
+ Read a CSV file into a pandas DataFrame.
59
+
60
+ Args:
61
+ filepath: Path to the CSV file
62
+ **kwargs: Additional arguments to pass to pd.read_csv()
63
+
64
+ Returns:
65
+ pd.DataFrame or Dict: DataFrame if successful, error dict if failed
66
+ """
67
+ try:
68
+ return pd.read_csv(filepath, **kwargs)
69
+ except Exception as e:
70
+ error_msg = f"Error reading CSV file {filepath}: {str(e)}"
71
+ logging.error(error_msg)
72
+ return {"error": error_msg}
73
+
74
+ def read_excel(self, filepath: str, **kwargs) -> Union[pd.DataFrame, Dict[str, str]]:
75
+ """
76
+ Read an Excel file into a pandas DataFrame.
77
+
78
+ Args:
79
+ filepath: Path to the Excel file
80
+ **kwargs: Additional arguments to pass to pd.read_excel()
81
+
82
+ Returns:
83
+ pd.DataFrame or Dict: DataFrame if successful, error dict if failed
84
+ """
85
+ try:
86
+ return pd.read_excel(filepath, **kwargs)
87
+ except Exception as e:
88
+ error_msg = f"Error reading Excel file {filepath}: {str(e)}"
89
+ logging.error(error_msg)
90
+ return {"error": error_msg}
91
+
92
+ def write_csv(self, df: pd.DataFrame, filepath: str, **kwargs) -> bool:
93
+ """
94
+ Write DataFrame to a CSV file.
95
+
96
+ Args:
97
+ df: DataFrame to write
98
+ filepath: Output file path
99
+ **kwargs: Additional arguments to pass to df.to_csv()
100
+
101
+ Returns:
102
+ bool: True if successful, False otherwise
103
+ """
104
+ try:
105
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
106
+ df.to_csv(filepath, **kwargs)
107
+ return True
108
+ except Exception as e:
109
+ error_msg = f"Error writing CSV file {filepath}: {str(e)}"
110
+ logging.error(error_msg)
111
+ return False
112
+
113
+ def write_excel(self, df: pd.DataFrame, filepath: str, **kwargs) -> bool:
114
+ """
115
+ Write DataFrame to an Excel file.
116
+
117
+ Args:
118
+ df: DataFrame to write
119
+ filepath: Output file path
120
+ **kwargs: Additional arguments to pass to df.to_excel()
121
+
122
+ Returns:
123
+ bool: True if successful, False otherwise
124
+ """
125
+ try:
126
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
127
+ df.to_excel(filepath, **kwargs)
128
+ return True
129
+ except Exception as e:
130
+ error_msg = f"Error writing Excel file {filepath}: {str(e)}"
131
+ logging.error(error_msg)
132
+ return False
133
+
134
+ def filter_data(self, df: pd.DataFrame, query: str) -> Union[pd.DataFrame, Dict[str, str]]:
135
+ """
136
+ Filter DataFrame using a query string.
137
+
138
+ Args:
139
+ df: Input DataFrame
140
+ query: Query string (e.g., "column > 5 and other_column == 'value'")
141
+
142
+ Returns:
143
+ pd.DataFrame or Dict: Filtered DataFrame if successful, error dict if failed
144
+ """
145
+ try:
146
+ return df.query(query)
147
+ except Exception as e:
148
+ error_msg = f"Error filtering data with query '{query}': {str(e)}"
149
+ logging.error(error_msg)
150
+ return {"error": error_msg}
151
+
152
+ def get_summary(self, df: pd.DataFrame) -> Dict[str, Any]:
153
+ """
154
+ Get a summary of the DataFrame including basic statistics and info.
155
+
156
+ Args:
157
+ df: Input DataFrame
158
+
159
+ Returns:
160
+ Dict: Summary statistics and information
161
+ """
162
+ try:
163
+ numeric_summary = df.describe().to_dict()
164
+ # Convert numpy types to native Python types
165
+ for col in numeric_summary:
166
+ numeric_summary[col] = {k: _convert_to_serializable(v)
167
+ for k, v in numeric_summary[col].items()}
168
+
169
+ summary = {
170
+ "shape": list(df.shape),
171
+ "columns": list(df.columns),
172
+ "dtypes": df.dtypes.astype(str).to_dict(),
173
+ "null_counts": df.isnull().sum().to_dict(),
174
+ "numeric_summary": numeric_summary,
175
+ "memory_usage": int(df.memory_usage(deep=True).sum()),
176
+ }
177
+ return summary
178
+ except Exception as e:
179
+ error_msg = f"Error getting data summary: {str(e)}"
180
+ logging.error(error_msg)
181
+ return {"error": error_msg}
182
+
183
+ def group_by(
184
+ self,
185
+ df: pd.DataFrame,
186
+ columns: Union[str, List[str]],
187
+ agg_dict: Dict[str, Union[str, List[str]]]
188
+ ) -> Union[pd.DataFrame, Dict[str, str]]:
189
+ """
190
+ Group DataFrame by columns and apply aggregation functions.
191
+
192
+ Args:
193
+ df: Input DataFrame
194
+ columns: Column(s) to group by
195
+ agg_dict: Dictionary of column:function pairs for aggregation
196
+
197
+ Returns:
198
+ pd.DataFrame or Dict: Grouped DataFrame if successful, error dict if failed
199
+ """
200
+ try:
201
+ return df.groupby(columns).agg(agg_dict).reset_index()
202
+ except Exception as e:
203
+ error_msg = f"Error grouping data: {str(e)}"
204
+ logging.error(error_msg)
205
+ return {"error": error_msg}
206
+
207
+ def pivot_table(
208
+ self,
209
+ df: pd.DataFrame,
210
+ index: Union[str, List[str]],
211
+ columns: Optional[Union[str, List[str]]] = None,
212
+ values: Optional[Union[str, List[str]]] = None,
213
+ aggfunc: str = "mean"
214
+ ) -> Union[pd.DataFrame, Dict[str, str]]:
215
+ """
216
+ Create a pivot table from DataFrame.
217
+
218
+ Args:
219
+ df: Input DataFrame
220
+ index: Column(s) to use as index
221
+ columns: Column(s) to use as columns
222
+ values: Column(s) to aggregate
223
+ aggfunc: Aggregation function to use
224
+
225
+ Returns:
226
+ pd.DataFrame or Dict: Pivot table if successful, error dict if failed
227
+ """
228
+ try:
229
+ return pd.pivot_table(
230
+ df,
231
+ index=index,
232
+ columns=columns,
233
+ values=values,
234
+ aggfunc=aggfunc
235
+ ).reset_index()
236
+ except Exception as e:
237
+ error_msg = f"Error creating pivot table: {str(e)}"
238
+ logging.error(error_msg)
239
+ return {"error": error_msg}
240
+
241
+ # Create instance for direct function access
242
+ _pandas_tools = PandasTools()
243
+ read_csv = _pandas_tools.read_csv
244
+ read_excel = _pandas_tools.read_excel
245
+ write_csv = _pandas_tools.write_csv
246
+ write_excel = _pandas_tools.write_excel
247
+ filter_data = _pandas_tools.filter_data
248
+ get_summary = _pandas_tools.get_summary
249
+ group_by = _pandas_tools.group_by
250
+ pivot_table = _pandas_tools.pivot_table
251
+
252
+ if __name__ == "__main__":
253
+ # Example usage
254
+ print("\n==================================================")
255
+ print("PandasTools Demonstration")
256
+ print("==================================================\n")
257
+
258
+ # Create a test directory
259
+ test_dir = os.path.join(os.getcwd(), "test_files")
260
+ os.makedirs(test_dir, exist_ok=True)
261
+
262
+ # Create a sample DataFrame
263
+ df = pd.DataFrame({
264
+ 'name': ['John', 'Jane', 'Bob', 'Alice', 'Charlie'],
265
+ 'age': [25, 30, 35, 28, 32],
266
+ 'city': ['New York', 'London', 'Paris', 'Tokyo', 'London'],
267
+ 'salary': [50000, 60000, 75000, 65000, 55000]
268
+ })
269
+
270
+ # 1. Write to CSV
271
+ print("1. Writing to CSV")
272
+ print("------------------------------")
273
+ csv_file = os.path.join(test_dir, "sample.csv")
274
+ success = write_csv(df, csv_file, index=False)
275
+ print(f"Write successful: {success}\n")
276
+
277
+ # 2. Read from CSV
278
+ print("2. Reading from CSV")
279
+ print("------------------------------")
280
+ df_read = read_csv(csv_file)
281
+ print("First few rows:")
282
+ print(df_read.head())
283
+ print()
284
+
285
+ # 3. Filter Data
286
+ print("3. Filtering Data")
287
+ print("------------------------------")
288
+ filtered_df = filter_data(df, "age > 30 and salary > 60000")
289
+ print("People over 30 with salary > 60000:")
290
+ print(filtered_df)
291
+ print()
292
+
293
+ # 4. Get Summary
294
+ print("4. Data Summary")
295
+ print("------------------------------")
296
+ summary = get_summary(df)
297
+ print(json.dumps(summary, indent=2))
298
+ print()
299
+
300
+ # 5. Group By
301
+ print("5. Group By")
302
+ print("------------------------------")
303
+ grouped = group_by(df, "city", {"salary": ["mean", "count"], "age": "mean"})
304
+ print("Statistics by city:")
305
+ print(grouped)
306
+ print()
307
+
308
+ # 6. Pivot Table
309
+ print("6. Pivot Table")
310
+ print("------------------------------")
311
+ pivoted = pivot_table(df, index="city", values=["salary", "age"])
312
+ print("Pivot table by city:")
313
+ print(pivoted)
314
+ print()
315
+
316
+ # Clean up test directory
317
+ try:
318
+ import shutil
319
+ shutil.rmtree(test_dir)
320
+ print("Test directory cleaned up successfully")
321
+ except Exception as e:
322
+ print(f"Error cleaning up test directory: {str(e)}")
323
+
324
+ print("\n==================================================")
325
+ print("Demonstration Complete")
326
+ print("==================================================")