praisonaiagents 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,354 @@
1
+ """Newspaper tools for scraping and parsing news articles.
2
+
3
+ Usage:
4
+ from praisonaiagents.tools import newspaper_tools
5
+ article = newspaper_tools.get_article("https://example.com/article")
6
+ sources = newspaper_tools.get_news_sources("technology")
7
+ articles = newspaper_tools.get_articles_from_source("https://techcrunch.com")
8
+
9
+ or
10
+ from praisonaiagents.tools import get_article, get_news_sources
11
+ article = get_article("https://example.com/article")
12
+ """
13
+
14
+ import logging
15
+ from typing import List, Dict, Union, Optional, Any
16
+ from importlib import util
17
+ import json
18
+ from urllib.parse import urlparse
19
+
20
+ # Predefined list of popular news sources
21
+ POPULAR_NEWS_SOURCES = {
22
+ 'technology': [
23
+ 'https://techcrunch.com',
24
+ 'https://www.theverge.com',
25
+ 'https://www.wired.com',
26
+ 'https://www.engadget.com',
27
+ 'https://arstechnica.com'
28
+ ],
29
+ 'business': [
30
+ 'https://www.bloomberg.com',
31
+ 'https://www.reuters.com',
32
+ 'https://www.wsj.com',
33
+ 'https://www.ft.com',
34
+ 'https://www.cnbc.com'
35
+ ],
36
+ 'general': [
37
+ 'https://www.nytimes.com',
38
+ 'https://www.theguardian.com',
39
+ 'https://www.washingtonpost.com',
40
+ 'https://www.bbc.com',
41
+ 'https://www.cnn.com'
42
+ ],
43
+ 'sports': [
44
+ 'https://www.espn.com',
45
+ 'https://sports.yahoo.com',
46
+ 'https://www.cbssports.com',
47
+ 'https://www.skysports.com',
48
+ 'https://www.bleacherreport.com'
49
+ ],
50
+ 'entertainment': [
51
+ 'https://variety.com',
52
+ 'https://www.hollywoodreporter.com',
53
+ 'https://www.ew.com',
54
+ 'https://www.deadline.com',
55
+ 'https://www.imdb.com/news'
56
+ ],
57
+ 'science': [
58
+ 'https://www.scientificamerican.com',
59
+ 'https://www.sciencedaily.com',
60
+ 'https://www.newscientist.com',
61
+ 'https://www.sciencemag.org',
62
+ 'https://www.nature.com/news'
63
+ ]
64
+ }
65
+
66
+ class NewspaperTools:
67
+ """Tools for scraping and parsing news articles."""
68
+
69
+ def __init__(self):
70
+ """Initialize NewspaperTools and check for newspaper package."""
71
+ self._check_newspaper()
72
+
73
+ def _check_newspaper(self):
74
+ """Check if newspaper package is installed."""
75
+ if util.find_spec("newspaper") is None:
76
+ raise ImportError("newspaper3k package is not available. Please install it using: pip install newspaper3k")
77
+ global newspaper
78
+ import newspaper
79
+
80
+ def get_article(
81
+ self,
82
+ url: str,
83
+ language: str = 'en'
84
+ ) -> Dict[str, Any]:
85
+ """
86
+ Extract and parse a news article from a URL.
87
+
88
+ Args:
89
+ url: URL of the article
90
+ language: Language code (e.g., 'en' for English)
91
+
92
+ Returns:
93
+ Dict: Article information including title, text, authors, etc.
94
+ """
95
+ try:
96
+ from newspaper import Article, Config
97
+
98
+ # Configure article download
99
+ config = Config()
100
+ config.browser_user_agent = 'Mozilla/5.0'
101
+ config.language = language
102
+
103
+ # Download and parse article
104
+ article = Article(url, config=config)
105
+ article.download()
106
+ article.parse()
107
+
108
+ # Try to extract additional information
109
+ try:
110
+ article.nlp()
111
+ except Exception as e:
112
+ logging.warning(f"NLP processing failed: {str(e)}")
113
+
114
+ # Build response
115
+ response = {
116
+ "url": url,
117
+ "title": article.title,
118
+ "text": article.text,
119
+ "authors": article.authors,
120
+ "publish_date": article.publish_date.isoformat() if article.publish_date else None,
121
+ "top_image": article.top_image,
122
+ "images": list(article.images),
123
+ "movies": list(article.movies),
124
+ "source_domain": urlparse(url).netloc,
125
+ }
126
+
127
+ # Add NLP results if available
128
+ if hasattr(article, 'keywords') and article.keywords:
129
+ response["keywords"] = article.keywords
130
+ if hasattr(article, 'summary') and article.summary:
131
+ response["summary"] = article.summary
132
+
133
+ return response
134
+ except Exception as e:
135
+ error_msg = f"Error extracting article from {url}: {str(e)}"
136
+ logging.error(error_msg)
137
+ return {"error": error_msg}
138
+
139
+ def get_news_sources(
140
+ self,
141
+ category: Optional[str] = None,
142
+ language: str = 'en',
143
+ country: Optional[str] = None
144
+ ) -> Union[List[Dict[str, str]], Dict[str, str]]:
145
+ """
146
+ Get a list of news sources, optionally filtered by category.
147
+
148
+ Args:
149
+ category: Category to filter by (e.g., 'technology', 'sports')
150
+ language: Language code
151
+ country: Country code
152
+
153
+ Returns:
154
+ List[Dict] or Dict: List of news sources or error dict
155
+ """
156
+ try:
157
+ sources = []
158
+
159
+ # Get sources for the specified category or all categories
160
+ if category:
161
+ category = category.lower()
162
+ if category in POPULAR_NEWS_SOURCES:
163
+ urls = POPULAR_NEWS_SOURCES[category]
164
+ else:
165
+ urls = []
166
+ for cat_urls in POPULAR_NEWS_SOURCES.values():
167
+ urls.extend(cat_urls)
168
+ else:
169
+ urls = []
170
+ for cat_urls in POPULAR_NEWS_SOURCES.values():
171
+ urls.extend(cat_urls)
172
+
173
+ # Create source objects
174
+ for url in urls:
175
+ domain = urlparse(url).netloc
176
+ source = {
177
+ "url": url,
178
+ "domain": domain,
179
+ "name": domain.replace("www.", "").split(".")[0].title(),
180
+ "category": category if category else "general"
181
+ }
182
+ sources.append(source)
183
+
184
+ return sources
185
+ except Exception as e:
186
+ error_msg = f"Error getting news sources: {str(e)}"
187
+ logging.error(error_msg)
188
+ return {"error": error_msg}
189
+
190
+ def get_articles_from_source(
191
+ self,
192
+ source_url: str,
193
+ limit: int = 10,
194
+ language: str = 'en'
195
+ ) -> Union[List[Dict[str, Any]], Dict[str, str]]:
196
+ """
197
+ Get recent articles from a news source.
198
+
199
+ Args:
200
+ source_url: URL of the news source
201
+ limit: Maximum number of articles to return
202
+ language: Language code
203
+
204
+ Returns:
205
+ List[Dict] or Dict: List of articles or error dict
206
+ """
207
+ try:
208
+ from newspaper import Source, Config
209
+
210
+ # Configure source scraping
211
+ config = Config()
212
+ config.browser_user_agent = 'Mozilla/5.0'
213
+ config.language = language
214
+ config.fetch_images = False # Speed up processing
215
+
216
+ # Build news source
217
+ source = Source(source_url, config=config)
218
+ source.build()
219
+
220
+ # Get articles
221
+ articles = []
222
+ for article_url in source.article_urls()[:limit]:
223
+ try:
224
+ article = self.get_article(article_url, language)
225
+ if "error" not in article:
226
+ articles.append(article)
227
+ except Exception as e:
228
+ logging.warning(f"Error processing article {article_url}: {str(e)}")
229
+ continue
230
+
231
+ if len(articles) >= limit:
232
+ break
233
+
234
+ return articles
235
+ except Exception as e:
236
+ error_msg = f"Error getting articles from {source_url}: {str(e)}"
237
+ logging.error(error_msg)
238
+ return {"error": error_msg}
239
+
240
+ def get_trending_topics(
241
+ self,
242
+ sources: Optional[List[str]] = None,
243
+ limit: int = 10,
244
+ language: str = 'en'
245
+ ) -> Union[List[str], Dict[str, str]]:
246
+ """
247
+ Get trending topics across news sources.
248
+
249
+ Args:
250
+ sources: List of source URLs to analyze
251
+ limit: Maximum number of trending topics to return
252
+ language: Language code
253
+
254
+ Returns:
255
+ List[str] or Dict: List of trending topics or error dict
256
+ """
257
+ try:
258
+ from collections import Counter
259
+
260
+ # Use default sources if none provided
261
+ if not sources:
262
+ sources_data = self.get_news_sources(language=language)
263
+ if isinstance(sources_data, dict) and "error" in sources_data:
264
+ return sources_data
265
+ sources = [s["url"] for s in sources_data[:5]] # Use top 5 sources
266
+
267
+ # Collect keywords from articles
268
+ all_keywords = []
269
+ for source_url in sources:
270
+ try:
271
+ articles = self.get_articles_from_source(source_url, limit=5, language=language)
272
+ if isinstance(articles, list):
273
+ for article in articles:
274
+ if "keywords" in article:
275
+ all_keywords.extend(article["keywords"])
276
+ except Exception as e:
277
+ logging.warning(f"Error processing source {source_url}: {str(e)}")
278
+ continue
279
+
280
+ # Get most common keywords
281
+ trending = Counter(all_keywords).most_common(limit)
282
+ return [topic for topic, count in trending]
283
+ except Exception as e:
284
+ error_msg = f"Error getting trending topics: {str(e)}"
285
+ logging.error(error_msg)
286
+ return {"error": error_msg}
287
+
288
+ # Create instance for direct function access
289
+ _newspaper_tools = NewspaperTools()
290
+ get_article = _newspaper_tools.get_article
291
+ get_news_sources = _newspaper_tools.get_news_sources
292
+ get_articles_from_source = _newspaper_tools.get_articles_from_source
293
+ get_trending_topics = _newspaper_tools.get_trending_topics
294
+
295
+ if __name__ == "__main__":
296
+ # Example usage
297
+ print("\n==================================================")
298
+ print("NewspaperTools Demonstration")
299
+ print("==================================================\n")
300
+
301
+ # 1. Get news sources
302
+ print("1. Getting News Sources")
303
+ print("------------------------------")
304
+ tech_sources = get_news_sources("technology")
305
+ print("Technology news sources:")
306
+ if isinstance(tech_sources, list):
307
+ print(json.dumps(tech_sources[:3], indent=2)) # Show first 3 sources
308
+ else:
309
+ print(tech_sources) # Show error
310
+ print()
311
+
312
+ if isinstance(tech_sources, list) and tech_sources:
313
+ source_url = tech_sources[0]["url"]
314
+
315
+ # 2. Get articles from a source
316
+ print("2. Getting Articles from Source")
317
+ print("------------------------------")
318
+ articles = get_articles_from_source(source_url, limit=2)
319
+ print(f"Articles from {source_url}:")
320
+ if isinstance(articles, list):
321
+ for article in articles:
322
+ print(f"- {article['title']}")
323
+ if "summary" in article:
324
+ print(f" Summary: {article['summary'][:200]}...")
325
+ else:
326
+ print(articles) # Show error
327
+ print()
328
+
329
+ # 3. Get a single article
330
+ print("3. Getting Single Article")
331
+ print("------------------------------")
332
+ if isinstance(articles, list) and articles:
333
+ article_url = articles[0]["url"]
334
+ article = get_article(article_url)
335
+ if "error" not in article:
336
+ print(f"Article: {article['title']}")
337
+ if "summary" in article:
338
+ print(f"Summary: {article['summary'][:200]}...")
339
+ print(f"Authors: {', '.join(article['authors'])}")
340
+ print(f"Date: {article['publish_date']}")
341
+ else:
342
+ print(article) # Show error
343
+ print()
344
+
345
+ # 4. Get trending topics
346
+ print("4. Getting Trending Topics")
347
+ print("------------------------------")
348
+ topics = get_trending_topics([source_url], limit=5)
349
+ print("Trending topics:")
350
+ print(json.dumps(topics, indent=2))
351
+
352
+ print("\n==================================================")
353
+ print("Demonstration Complete")
354
+ print("==================================================")
@@ -0,0 +1,326 @@
1
+ """Pandas tools for data manipulation and analysis.
2
+
3
+ Usage:
4
+ from praisonaiagents.tools import pandas_tools
5
+ df = pandas_tools.read_csv("data.csv")
6
+ df = pandas_tools.filter_data(df, "column > 5")
7
+ summary = pandas_tools.get_summary(df)
8
+
9
+ or
10
+ from praisonaiagents.tools import read_csv, filter_data, get_summary
11
+ df = read_csv("data.csv")
12
+ """
13
+
14
+ import logging
15
+ from typing import List, Dict, Union, Optional, Any
16
+ from importlib import util
17
+ import json
18
+ import os
19
+
20
+ # Import pandas for type hints, but don't use it until we check it's installed
21
+ if util.find_spec("pandas") is not None:
22
+ import pandas as pd
23
+ import numpy as np
24
+ else:
25
+ # Create a placeholder for type hints
26
+ class pd:
27
+ DataFrame = None
28
+
29
+ def _convert_to_serializable(obj: Any) -> Any:
30
+ """Convert numpy/pandas types to JSON serializable Python types."""
31
+ if isinstance(obj, (np.integer, np.floating)):
32
+ return obj.item()
33
+ elif isinstance(obj, np.ndarray):
34
+ return obj.tolist()
35
+ elif isinstance(obj, pd.Series):
36
+ return obj.to_list()
37
+ elif isinstance(obj, pd.DataFrame):
38
+ return obj.to_dict(orient='records')
39
+ return obj
40
+
41
+ class PandasTools:
42
+ """Tools for data manipulation and analysis using pandas."""
43
+
44
+ def __init__(self):
45
+ """Initialize PandasTools and check for pandas installation."""
46
+ self._check_pandas()
47
+
48
+ def _check_pandas(self):
49
+ """Check if pandas is installed."""
50
+ if util.find_spec("pandas") is None:
51
+ raise ImportError("pandas is not available. Please install it using: pip install pandas")
52
+ global pd, np
53
+ import pandas as pd
54
+ import numpy as np
55
+
56
+ def read_csv(self, filepath: str, **kwargs) -> Union[pd.DataFrame, Dict[str, str]]:
57
+ """
58
+ Read a CSV file into a pandas DataFrame.
59
+
60
+ Args:
61
+ filepath: Path to the CSV file
62
+ **kwargs: Additional arguments to pass to pd.read_csv()
63
+
64
+ Returns:
65
+ pd.DataFrame or Dict: DataFrame if successful, error dict if failed
66
+ """
67
+ try:
68
+ return pd.read_csv(filepath, **kwargs)
69
+ except Exception as e:
70
+ error_msg = f"Error reading CSV file {filepath}: {str(e)}"
71
+ logging.error(error_msg)
72
+ return {"error": error_msg}
73
+
74
+ def read_excel(self, filepath: str, **kwargs) -> Union[pd.DataFrame, Dict[str, str]]:
75
+ """
76
+ Read an Excel file into a pandas DataFrame.
77
+
78
+ Args:
79
+ filepath: Path to the Excel file
80
+ **kwargs: Additional arguments to pass to pd.read_excel()
81
+
82
+ Returns:
83
+ pd.DataFrame or Dict: DataFrame if successful, error dict if failed
84
+ """
85
+ try:
86
+ return pd.read_excel(filepath, **kwargs)
87
+ except Exception as e:
88
+ error_msg = f"Error reading Excel file {filepath}: {str(e)}"
89
+ logging.error(error_msg)
90
+ return {"error": error_msg}
91
+
92
+ def write_csv(self, df: pd.DataFrame, filepath: str, **kwargs) -> bool:
93
+ """
94
+ Write DataFrame to a CSV file.
95
+
96
+ Args:
97
+ df: DataFrame to write
98
+ filepath: Output file path
99
+ **kwargs: Additional arguments to pass to df.to_csv()
100
+
101
+ Returns:
102
+ bool: True if successful, False otherwise
103
+ """
104
+ try:
105
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
106
+ df.to_csv(filepath, **kwargs)
107
+ return True
108
+ except Exception as e:
109
+ error_msg = f"Error writing CSV file {filepath}: {str(e)}"
110
+ logging.error(error_msg)
111
+ return False
112
+
113
+ def write_excel(self, df: pd.DataFrame, filepath: str, **kwargs) -> bool:
114
+ """
115
+ Write DataFrame to an Excel file.
116
+
117
+ Args:
118
+ df: DataFrame to write
119
+ filepath: Output file path
120
+ **kwargs: Additional arguments to pass to df.to_excel()
121
+
122
+ Returns:
123
+ bool: True if successful, False otherwise
124
+ """
125
+ try:
126
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
127
+ df.to_excel(filepath, **kwargs)
128
+ return True
129
+ except Exception as e:
130
+ error_msg = f"Error writing Excel file {filepath}: {str(e)}"
131
+ logging.error(error_msg)
132
+ return False
133
+
134
+ def filter_data(self, df: pd.DataFrame, query: str) -> Union[pd.DataFrame, Dict[str, str]]:
135
+ """
136
+ Filter DataFrame using a query string.
137
+
138
+ Args:
139
+ df: Input DataFrame
140
+ query: Query string (e.g., "column > 5 and other_column == 'value'")
141
+
142
+ Returns:
143
+ pd.DataFrame or Dict: Filtered DataFrame if successful, error dict if failed
144
+ """
145
+ try:
146
+ return df.query(query)
147
+ except Exception as e:
148
+ error_msg = f"Error filtering data with query '{query}': {str(e)}"
149
+ logging.error(error_msg)
150
+ return {"error": error_msg}
151
+
152
+ def get_summary(self, df: pd.DataFrame) -> Dict[str, Any]:
153
+ """
154
+ Get a summary of the DataFrame including basic statistics and info.
155
+
156
+ Args:
157
+ df: Input DataFrame
158
+
159
+ Returns:
160
+ Dict: Summary statistics and information
161
+ """
162
+ try:
163
+ numeric_summary = df.describe().to_dict()
164
+ # Convert numpy types to native Python types
165
+ for col in numeric_summary:
166
+ numeric_summary[col] = {k: _convert_to_serializable(v)
167
+ for k, v in numeric_summary[col].items()}
168
+
169
+ summary = {
170
+ "shape": list(df.shape),
171
+ "columns": list(df.columns),
172
+ "dtypes": df.dtypes.astype(str).to_dict(),
173
+ "null_counts": df.isnull().sum().to_dict(),
174
+ "numeric_summary": numeric_summary,
175
+ "memory_usage": int(df.memory_usage(deep=True).sum()),
176
+ }
177
+ return summary
178
+ except Exception as e:
179
+ error_msg = f"Error getting data summary: {str(e)}"
180
+ logging.error(error_msg)
181
+ return {"error": error_msg}
182
+
183
+ def group_by(
184
+ self,
185
+ df: pd.DataFrame,
186
+ columns: Union[str, List[str]],
187
+ agg_dict: Dict[str, Union[str, List[str]]]
188
+ ) -> Union[pd.DataFrame, Dict[str, str]]:
189
+ """
190
+ Group DataFrame by columns and apply aggregation functions.
191
+
192
+ Args:
193
+ df: Input DataFrame
194
+ columns: Column(s) to group by
195
+ agg_dict: Dictionary of column:function pairs for aggregation
196
+
197
+ Returns:
198
+ pd.DataFrame or Dict: Grouped DataFrame if successful, error dict if failed
199
+ """
200
+ try:
201
+ return df.groupby(columns).agg(agg_dict).reset_index()
202
+ except Exception as e:
203
+ error_msg = f"Error grouping data: {str(e)}"
204
+ logging.error(error_msg)
205
+ return {"error": error_msg}
206
+
207
+ def pivot_table(
208
+ self,
209
+ df: pd.DataFrame,
210
+ index: Union[str, List[str]],
211
+ columns: Optional[Union[str, List[str]]] = None,
212
+ values: Optional[Union[str, List[str]]] = None,
213
+ aggfunc: str = "mean"
214
+ ) -> Union[pd.DataFrame, Dict[str, str]]:
215
+ """
216
+ Create a pivot table from DataFrame.
217
+
218
+ Args:
219
+ df: Input DataFrame
220
+ index: Column(s) to use as index
221
+ columns: Column(s) to use as columns
222
+ values: Column(s) to aggregate
223
+ aggfunc: Aggregation function to use
224
+
225
+ Returns:
226
+ pd.DataFrame or Dict: Pivot table if successful, error dict if failed
227
+ """
228
+ try:
229
+ return pd.pivot_table(
230
+ df,
231
+ index=index,
232
+ columns=columns,
233
+ values=values,
234
+ aggfunc=aggfunc
235
+ ).reset_index()
236
+ except Exception as e:
237
+ error_msg = f"Error creating pivot table: {str(e)}"
238
+ logging.error(error_msg)
239
+ return {"error": error_msg}
240
+
241
+ # Create instance for direct function access
242
+ _pandas_tools = PandasTools()
243
+ read_csv = _pandas_tools.read_csv
244
+ read_excel = _pandas_tools.read_excel
245
+ write_csv = _pandas_tools.write_csv
246
+ write_excel = _pandas_tools.write_excel
247
+ filter_data = _pandas_tools.filter_data
248
+ get_summary = _pandas_tools.get_summary
249
+ group_by = _pandas_tools.group_by
250
+ pivot_table = _pandas_tools.pivot_table
251
+
252
+ if __name__ == "__main__":
253
+ # Example usage
254
+ print("\n==================================================")
255
+ print("PandasTools Demonstration")
256
+ print("==================================================\n")
257
+
258
+ # Create a test directory
259
+ test_dir = os.path.join(os.getcwd(), "test_files")
260
+ os.makedirs(test_dir, exist_ok=True)
261
+
262
+ # Create a sample DataFrame
263
+ df = pd.DataFrame({
264
+ 'name': ['John', 'Jane', 'Bob', 'Alice', 'Charlie'],
265
+ 'age': [25, 30, 35, 28, 32],
266
+ 'city': ['New York', 'London', 'Paris', 'Tokyo', 'London'],
267
+ 'salary': [50000, 60000, 75000, 65000, 55000]
268
+ })
269
+
270
+ # 1. Write to CSV
271
+ print("1. Writing to CSV")
272
+ print("------------------------------")
273
+ csv_file = os.path.join(test_dir, "sample.csv")
274
+ success = write_csv(df, csv_file, index=False)
275
+ print(f"Write successful: {success}\n")
276
+
277
+ # 2. Read from CSV
278
+ print("2. Reading from CSV")
279
+ print("------------------------------")
280
+ df_read = read_csv(csv_file)
281
+ print("First few rows:")
282
+ print(df_read.head())
283
+ print()
284
+
285
+ # 3. Filter Data
286
+ print("3. Filtering Data")
287
+ print("------------------------------")
288
+ filtered_df = filter_data(df, "age > 30 and salary > 60000")
289
+ print("People over 30 with salary > 60000:")
290
+ print(filtered_df)
291
+ print()
292
+
293
+ # 4. Get Summary
294
+ print("4. Data Summary")
295
+ print("------------------------------")
296
+ summary = get_summary(df)
297
+ print(json.dumps(summary, indent=2))
298
+ print()
299
+
300
+ # 5. Group By
301
+ print("5. Group By")
302
+ print("------------------------------")
303
+ grouped = group_by(df, "city", {"salary": ["mean", "count"], "age": "mean"})
304
+ print("Statistics by city:")
305
+ print(grouped)
306
+ print()
307
+
308
+ # 6. Pivot Table
309
+ print("6. Pivot Table")
310
+ print("------------------------------")
311
+ pivoted = pivot_table(df, index="city", values=["salary", "age"])
312
+ print("Pivot table by city:")
313
+ print(pivoted)
314
+ print()
315
+
316
+ # Clean up test directory
317
+ try:
318
+ import shutil
319
+ shutil.rmtree(test_dir)
320
+ print("Test directory cleaned up successfully")
321
+ except Exception as e:
322
+ print(f"Error cleaning up test directory: {str(e)}")
323
+
324
+ print("\n==================================================")
325
+ print("Demonstration Complete")
326
+ print("==================================================")