google-news-trends-mcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google_news_trends_mcp/__init__.py +0 -0
- google_news_trends_mcp/__main__.py +5 -0
- google_news_trends_mcp/cli.py +149 -0
- google_news_trends_mcp/news.py +326 -0
- google_news_trends_mcp/server.py +260 -0
- google_news_trends_mcp-0.1.0.dist-info/METADATA +182 -0
- google_news_trends_mcp-0.1.0.dist-info/RECORD +11 -0
- google_news_trends_mcp-0.1.0.dist-info/WHEEL +5 -0
- google_news_trends_mcp-0.1.0.dist-info/entry_points.txt +3 -0
- google_news_trends_mcp-0.1.0.dist-info/licenses/LICENSE +21 -0
- google_news_trends_mcp-0.1.0.dist-info/top_level.txt +1 -0
File without changes
|
@@ -0,0 +1,149 @@
|
|
1
|
+
import click
|
2
|
+
import asyncio
|
3
|
+
from google_news_trends_mcp.news import (
|
4
|
+
get_news_by_keyword,
|
5
|
+
get_news_by_location,
|
6
|
+
get_news_by_topic,
|
7
|
+
get_trending_terms,
|
8
|
+
get_top_news,
|
9
|
+
save_article_to_json,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
@click.group()
|
14
|
+
def cli():
|
15
|
+
pass
|
16
|
+
|
17
|
+
|
18
|
+
@cli.command(help=get_news_by_keyword.__doc__)
|
19
|
+
@click.argument("keyword")
|
20
|
+
@click.option(
|
21
|
+
"--period", type=int, default=7, help="Period in days to search for articles."
|
22
|
+
)
|
23
|
+
@click.option(
|
24
|
+
"--max-results",
|
25
|
+
"max_results",
|
26
|
+
type=int,
|
27
|
+
default=10,
|
28
|
+
help="Maximum number of results to return.",
|
29
|
+
)
|
30
|
+
@click.option(
|
31
|
+
"--no-nlp", is_flag=True, default=False, help="Disable NLP processing for articles."
|
32
|
+
)
|
33
|
+
def keyword(keyword, period, max_results, no_nlp):
|
34
|
+
articles = asyncio.run(
|
35
|
+
get_news_by_keyword(
|
36
|
+
keyword, period=period, max_results=max_results, nlp=not no_nlp
|
37
|
+
)
|
38
|
+
)
|
39
|
+
# asyncio.run(articles) # Ensure the articles are fetched asynchronously
|
40
|
+
print_articles(articles)
|
41
|
+
|
42
|
+
|
43
|
+
@cli.command(help=get_news_by_location.__doc__)
|
44
|
+
@click.argument("location")
|
45
|
+
@click.option(
|
46
|
+
"--period", type=int, default=7, help="Period in days to search for articles."
|
47
|
+
)
|
48
|
+
@click.option(
|
49
|
+
"--max-results",
|
50
|
+
"max_results",
|
51
|
+
type=int,
|
52
|
+
default=10,
|
53
|
+
help="Maximum number of results to return.",
|
54
|
+
)
|
55
|
+
@click.option(
|
56
|
+
"--no-nlp", is_flag=True, default=False, help="Disable NLP processing for articles."
|
57
|
+
)
|
58
|
+
def location(location, period, max_results, no_nlp):
|
59
|
+
articles = asyncio.run(
|
60
|
+
get_news_by_location(
|
61
|
+
location, period=period, max_results=max_results, nlp=not no_nlp
|
62
|
+
)
|
63
|
+
)
|
64
|
+
print_articles(articles)
|
65
|
+
|
66
|
+
|
67
|
+
@cli.command(help=get_news_by_topic.__doc__)
|
68
|
+
@click.argument("topic")
|
69
|
+
@click.option(
|
70
|
+
"--period", type=int, default=7, help="Period in days to search for articles."
|
71
|
+
)
|
72
|
+
@click.option(
|
73
|
+
"--max-results",
|
74
|
+
"max_results",
|
75
|
+
type=int,
|
76
|
+
default=10,
|
77
|
+
help="Maximum number of results to return.",
|
78
|
+
)
|
79
|
+
@click.option(
|
80
|
+
"--no-nlp", is_flag=True, default=False, help="Disable NLP processing for articles."
|
81
|
+
)
|
82
|
+
def topic(topic, period, max_results, no_nlp):
|
83
|
+
articles = asyncio.run(
|
84
|
+
get_news_by_topic(topic, period=period, max_results=max_results, nlp=not no_nlp)
|
85
|
+
)
|
86
|
+
print_articles(articles)
|
87
|
+
|
88
|
+
|
89
|
+
@cli.command(help=get_trending_terms.__doc__)
|
90
|
+
@click.option(
|
91
|
+
"--geo", type=str, default="US", help="Country code, e.g. 'US', 'GB', 'IN', etc."
|
92
|
+
)
|
93
|
+
@click.option(
|
94
|
+
"--full-data", is_flag=True, default=False, help="Return full data for each trend."
|
95
|
+
)
|
96
|
+
@click.option(
|
97
|
+
"--max-results",
|
98
|
+
"max_results",
|
99
|
+
type=int,
|
100
|
+
default=100,
|
101
|
+
help="Maximum number of results to return.",
|
102
|
+
)
|
103
|
+
def trending(geo, full_data, max_results):
|
104
|
+
trending_terms = asyncio.run(
|
105
|
+
get_trending_terms(geo=geo, full_data=full_data, max_results=max_results)
|
106
|
+
)
|
107
|
+
if trending_terms:
|
108
|
+
print("Trending terms:")
|
109
|
+
for term in trending_terms:
|
110
|
+
print(f"- {term}")
|
111
|
+
else:
|
112
|
+
print("No trending terms found.")
|
113
|
+
|
114
|
+
|
115
|
+
@cli.command(help=get_top_news.__doc__)
|
116
|
+
@click.option(
|
117
|
+
"--period", type=int, default=3, help="Period in days to search for top articles."
|
118
|
+
)
|
119
|
+
@click.option(
|
120
|
+
"--max-results",
|
121
|
+
"max_results",
|
122
|
+
type=int,
|
123
|
+
default=10,
|
124
|
+
help="Maximum number of results to return.",
|
125
|
+
)
|
126
|
+
@click.option(
|
127
|
+
"--no-nlp", is_flag=True, default=False, help="Disable NLP processing for articles."
|
128
|
+
)
|
129
|
+
def top(period, max_results, no_nlp):
|
130
|
+
articles = asyncio.run(
|
131
|
+
get_top_news(max_results=max_results, period=period, nlp=not no_nlp)
|
132
|
+
)
|
133
|
+
print_articles(articles)
|
134
|
+
print(f"Found {len(articles)} top articles.")
|
135
|
+
|
136
|
+
|
137
|
+
def print_articles(articles):
|
138
|
+
for article in articles:
|
139
|
+
print(f"Title: {article.title}")
|
140
|
+
print(f"URL: {article.original_url}")
|
141
|
+
print(f"Authors: {article.authors}")
|
142
|
+
print(f"Publish Date: {article.publish_date}")
|
143
|
+
print(f"Top Image: {article.top_image}")
|
144
|
+
print(f"Summary: {article.summary}\n")
|
145
|
+
save_article_to_json(article)
|
146
|
+
|
147
|
+
|
148
|
+
if __name__ == "__main__":
|
149
|
+
cli()
|
@@ -0,0 +1,326 @@
|
|
1
|
+
"""
|
2
|
+
# news.py
|
3
|
+
This module provides functions to find and download news articles using Google News.
|
4
|
+
It allows searching for articles by keyword, location, or topic, and can also retrieve google trending terms.
|
5
|
+
It uses the `gnews` library to search for news articles and trendspy to get Google Trends data.
|
6
|
+
It will fallback to using Playwright for websites that are difficult to scrape with newspaper4k or cloudscraper.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import re
|
10
|
+
import json
|
11
|
+
import time
|
12
|
+
import asyncio
|
13
|
+
from gnews import GNews
|
14
|
+
import newspaper # newspaper4k
|
15
|
+
from googlenewsdecoder import gnewsdecoder
|
16
|
+
import cloudscraper
|
17
|
+
from playwright.async_api import async_playwright, Browser
|
18
|
+
from trendspy import Trends, TrendKeyword
|
19
|
+
import click
|
20
|
+
from typing import Optional
|
21
|
+
import atexit
|
22
|
+
from contextlib import asynccontextmanager
|
23
|
+
|
24
|
+
tr = Trends()
|
25
|
+
|
26
|
+
scraper = cloudscraper.create_scraper(
|
27
|
+
# Challenge handling
|
28
|
+
interpreter="js2py", # Best compatibility for v3 challenges
|
29
|
+
delay=5, # Extra time for complex challenges
|
30
|
+
# Stealth mode
|
31
|
+
# enable_stealth=True,
|
32
|
+
# stealth_options={
|
33
|
+
# 'min_delay': 2.0,
|
34
|
+
# 'max_delay': 6.0,
|
35
|
+
# 'human_like_delays': True,
|
36
|
+
# 'randomize_headers': True,
|
37
|
+
# 'browser_quirks': True
|
38
|
+
# },
|
39
|
+
# Browser emulation
|
40
|
+
browser="chrome",
|
41
|
+
# Debug mode
|
42
|
+
debug=False,
|
43
|
+
)
|
44
|
+
|
45
|
+
google_news = GNews(
|
46
|
+
language="en",
|
47
|
+
exclude_websites=["mensjournal.com"],
|
48
|
+
)
|
49
|
+
|
50
|
+
playwright = None
|
51
|
+
browser: Browser = None
|
52
|
+
|
53
|
+
|
54
|
+
async def startup_browser():
|
55
|
+
global playwright, browser
|
56
|
+
playwright = await async_playwright().start()
|
57
|
+
browser = await playwright.chromium.launch(headless=True)
|
58
|
+
|
59
|
+
|
60
|
+
@atexit.register
|
61
|
+
def shutdown_browser():
|
62
|
+
if browser:
|
63
|
+
asyncio.run(browser.close())
|
64
|
+
if playwright:
|
65
|
+
asyncio.run(playwright.stop())
|
66
|
+
|
67
|
+
|
68
|
+
async def get_browser() -> Browser:
|
69
|
+
global browser
|
70
|
+
if browser is None:
|
71
|
+
await startup_browser()
|
72
|
+
return browser
|
73
|
+
|
74
|
+
|
75
|
+
@asynccontextmanager
|
76
|
+
async def browser_context():
|
77
|
+
context = await (await get_browser()).new_context()
|
78
|
+
try:
|
79
|
+
yield context
|
80
|
+
finally:
|
81
|
+
print("Closing browser context...")
|
82
|
+
await context.close()
|
83
|
+
|
84
|
+
|
85
|
+
async def download_article_with_playwright(url) -> newspaper.Article | None:
|
86
|
+
"""
|
87
|
+
Download an article using Playwright to handle complex websites (async).
|
88
|
+
"""
|
89
|
+
try:
|
90
|
+
async with browser_context() as context:
|
91
|
+
# context = await new_context()
|
92
|
+
page = await context.new_page()
|
93
|
+
await page.goto(url, wait_until="domcontentloaded")
|
94
|
+
await asyncio.sleep(2) # Wait for the page to load completely
|
95
|
+
content = await page.content()
|
96
|
+
# await context.close()
|
97
|
+
article = newspaper.article(url, input_html=content, language="en")
|
98
|
+
return article
|
99
|
+
except Exception as e:
|
100
|
+
print(f"Error downloading article with Playwright from {url}\n {e.args}")
|
101
|
+
return None
|
102
|
+
|
103
|
+
|
104
|
+
async def download_article(url: str, nlp: bool = True) -> newspaper.Article | None:
|
105
|
+
"""
|
106
|
+
Download an article from a given URL using newspaper4k and cloudscraper (async).
|
107
|
+
"""
|
108
|
+
article = None
|
109
|
+
if url.startswith("https://news.google.com/rss/"):
|
110
|
+
try:
|
111
|
+
decoded_url = gnewsdecoder(url)
|
112
|
+
if decoded_url.get("status"):
|
113
|
+
url = decoded_url["decoded_url"]
|
114
|
+
else:
|
115
|
+
print("Failed to decode Google News RSS link:")
|
116
|
+
return None
|
117
|
+
except Exception as err:
|
118
|
+
print(f"Error while decoding url {url}\n {err.args}")
|
119
|
+
try:
|
120
|
+
article = newspaper.article(url)
|
121
|
+
except Exception as e:
|
122
|
+
print(f"Error downloading article with newspaper from {url}\n {e.args}")
|
123
|
+
try:
|
124
|
+
# Retry with cloudscraper
|
125
|
+
response = scraper.get(url)
|
126
|
+
if response.status_code < 400:
|
127
|
+
article = newspaper.article(url, input_html=response.text)
|
128
|
+
else:
|
129
|
+
print(
|
130
|
+
f"Failed to download article with cloudscraper from {url}, status code: {response.status_code}"
|
131
|
+
)
|
132
|
+
except Exception as e:
|
133
|
+
print(f"Error downloading article with cloudscraper from {url}\n {e.args}")
|
134
|
+
|
135
|
+
try:
|
136
|
+
if article is None:
|
137
|
+
# If newspaper failed, try downloading with Playwright
|
138
|
+
print(f"Retrying with Playwright for {url}")
|
139
|
+
article = await download_article_with_playwright(url)
|
140
|
+
article.parse()
|
141
|
+
if nlp:
|
142
|
+
article.nlp()
|
143
|
+
if article.publish_date:
|
144
|
+
article.publish_date = article.publish_date.isoformat()
|
145
|
+
except Exception as e:
|
146
|
+
print(f"Error parsing article from {url}\n {e.args}")
|
147
|
+
return None
|
148
|
+
return article
|
149
|
+
|
150
|
+
|
151
|
+
async def process_gnews_articles(
|
152
|
+
gnews_articles: list[dict], nlp: bool = True
|
153
|
+
) -> list["newspaper.Article"]:
|
154
|
+
"""
|
155
|
+
Process a list of Google News articles and download them (async).
|
156
|
+
"""
|
157
|
+
articles = []
|
158
|
+
for gnews_article in gnews_articles:
|
159
|
+
article = await download_article(gnews_article["url"], nlp=nlp)
|
160
|
+
if article is None or not article.text:
|
161
|
+
print(f"Failed to download article from {gnews_article['url']}:\n{article}")
|
162
|
+
continue
|
163
|
+
articles.append(article)
|
164
|
+
return articles
|
165
|
+
|
166
|
+
|
167
|
+
async def get_news_by_keyword(
|
168
|
+
keyword: str, period=7, max_results: int = 10, nlp: bool = True
|
169
|
+
) -> list[newspaper.Article]:
|
170
|
+
"""
|
171
|
+
Find articles by keyword using Google News.
|
172
|
+
keyword: is the search term to find articles.
|
173
|
+
period: is the number of days to look back for articles.
|
174
|
+
max_results: is the maximum number of results to return.
|
175
|
+
nlp: If True, will perform NLP on the articles to extract keywords and summary.
|
176
|
+
Returns:
|
177
|
+
list[newspaper.Article]: A list of newspaper.Article objects containing the articles.
|
178
|
+
"""
|
179
|
+
google_news.period = f"{period}d"
|
180
|
+
google_news.max_results = max_results
|
181
|
+
gnews_articles = google_news.get_news(keyword)
|
182
|
+
if not gnews_articles:
|
183
|
+
print(f"No articles found for keyword '{keyword}' in the last {period} days.")
|
184
|
+
return []
|
185
|
+
return await process_gnews_articles(gnews_articles, nlp=nlp)
|
186
|
+
|
187
|
+
|
188
|
+
async def get_top_news(
|
189
|
+
period: int = 3, max_results: int = 10, nlp: bool = True
|
190
|
+
) -> list["newspaper.Article"]:
|
191
|
+
"""
|
192
|
+
Get top news stories from Google News.
|
193
|
+
period: is the number of days to look back for top articles.
|
194
|
+
max_results: is the maximum number of results to return.
|
195
|
+
nlp: If True, will perform NLP on the articles to extract keywords and summary.
|
196
|
+
Returns:
|
197
|
+
list[newspaper.Article]: A list of newspaper.Article objects containing the top news articles.
|
198
|
+
"""
|
199
|
+
google_news.period = f"{period}d"
|
200
|
+
google_news.max_results = max_results
|
201
|
+
gnews_articles = google_news.get_top_news()
|
202
|
+
if not gnews_articles:
|
203
|
+
print("No top news articles found.")
|
204
|
+
return []
|
205
|
+
return await process_gnews_articles(gnews_articles, nlp=nlp)
|
206
|
+
|
207
|
+
|
208
|
+
async def get_news_by_location(
|
209
|
+
location: str, period=7, max_results: int = 10, nlp: bool = True
|
210
|
+
) -> list[newspaper.Article]:
|
211
|
+
"""Find articles by location using Google News.
|
212
|
+
location: is the name of city/state/country
|
213
|
+
period: is the number of days to look back for articles.
|
214
|
+
max_results: is the maximum number of results to return.
|
215
|
+
nlp: If True, will perform NLP on the articles to extract keywords and summary.
|
216
|
+
Returns:
|
217
|
+
list[newspaper.Article]: A list of newspaper.Article objects containing the articles for the specified location
|
218
|
+
"""
|
219
|
+
google_news.period = f"{period}d"
|
220
|
+
google_news.max_results = max_results
|
221
|
+
gnews_articles = google_news.get_news_by_location(location)
|
222
|
+
if not gnews_articles:
|
223
|
+
print(f"No articles found for location '{location}' in the last {period} days.")
|
224
|
+
return []
|
225
|
+
return await process_gnews_articles(gnews_articles, nlp=nlp)
|
226
|
+
|
227
|
+
|
228
|
+
async def get_news_by_topic(
|
229
|
+
topic: str, period=7, max_results: int = 10, nlp: bool = True
|
230
|
+
) -> list[newspaper.Article]:
|
231
|
+
"""Find articles by topic using Google News.
|
232
|
+
topic is one of
|
233
|
+
WORLD, NATION, BUSINESS, TECHNOLOGY, ENTERTAINMENT, SPORTS, SCIENCE, HEALTH,
|
234
|
+
POLITICS, CELEBRITIES, TV, MUSIC, MOVIES, THEATER, SOCCER, CYCLING, MOTOR SPORTS,
|
235
|
+
TENNIS, COMBAT SPORTS, BASKETBALL, BASEBALL, FOOTBALL, SPORTS BETTING, WATER SPORTS,
|
236
|
+
HOCKEY, GOLF, CRICKET, RUGBY, ECONOMY, PERSONAL FINANCE, FINANCE, DIGITAL CURRENCIES,
|
237
|
+
MOBILE, ENERGY, GAMING, INTERNET SECURITY, GADGETS, VIRTUAL REALITY, ROBOTICS, NUTRITION,
|
238
|
+
PUBLIC HEALTH, MENTAL HEALTH, MEDICINE, SPACE, WILDLIFE, ENVIRONMENT, NEUROSCIENCE, PHYSICS,
|
239
|
+
GEOLOGY, PALEONTOLOGY, SOCIAL SCIENCES, EDUCATION, JOBS, ONLINE EDUCATION, HIGHER EDUCATION,
|
240
|
+
VEHICLES, ARTS-DESIGN, BEAUTY, FOOD, TRAVEL, SHOPPING, HOME, OUTDOORS, FASHION.
|
241
|
+
period: is the number of days to look back for articles.
|
242
|
+
max_results: is the maximum number of results to return.
|
243
|
+
nlp: If True, will perform NLP on the articles to extract keywords and summary.
|
244
|
+
Returns:
|
245
|
+
list[newspaper.Article]: A list of newspaper.Article objects containing the articles for the specified topic
|
246
|
+
"""
|
247
|
+
google_news.period = f"{period}d"
|
248
|
+
google_news.max_results = max_results
|
249
|
+
gnews_articles = google_news.get_news_by_topic(topic)
|
250
|
+
if not gnews_articles:
|
251
|
+
print(f"No articles found for topic '{topic}' in the last {period} days.")
|
252
|
+
return []
|
253
|
+
return await process_gnews_articles(gnews_articles, nlp=nlp)
|
254
|
+
|
255
|
+
|
256
|
+
async def get_trending_terms(
|
257
|
+
geo: str = "US", full_data: bool = False, max_results: int = 100
|
258
|
+
) -> list[tuple[str, int]] | list[TrendKeyword]:
|
259
|
+
"""
|
260
|
+
Returns google trends for a specific geo location.
|
261
|
+
Default is US.
|
262
|
+
geo: is the country code, e.g. 'US', 'GB', 'IN', etc.
|
263
|
+
full_data: if True, returns full data for each trend, otherwise returns only the trend and volume.
|
264
|
+
max_results: is the maximum number of results to return, default is 100.
|
265
|
+
Returns:
|
266
|
+
list[tuple[str, int]]: A list of tuples containing the trend keyword and its volume.
|
267
|
+
If full_data is True, each tuple will also contain additional data such as related queries and trend type.
|
268
|
+
"""
|
269
|
+
try:
|
270
|
+
trends = list(tr.trending_now(geo=geo))
|
271
|
+
trends = list(sorted(trends, key=lambda tt: tt.volume, reverse=True))[
|
272
|
+
:max_results
|
273
|
+
]
|
274
|
+
if not full_data:
|
275
|
+
return [(trend.keyword, trend.volume) for trend in trends]
|
276
|
+
return trends
|
277
|
+
except Exception as e:
|
278
|
+
print(f"Error fetching trending terms: {e}")
|
279
|
+
return []
|
280
|
+
|
281
|
+
|
282
|
+
def save_article_to_json(article: newspaper.Article, filename: str = "") -> None:
|
283
|
+
def sanitize_filename(title: str) -> str:
|
284
|
+
"""
|
285
|
+
# save Article to json file
|
286
|
+
# filename is based on the article title
|
287
|
+
# if the title is too long, it will be truncated to 50 characters
|
288
|
+
# and replaced with underscores if it contains any special characters
|
289
|
+
"""
|
290
|
+
# Replace special characters and spaces with underscores, then truncate to 50 characters
|
291
|
+
sanitized_title = re.sub(r'[\\/*?:"<>|\s]', "_", title)[:50]
|
292
|
+
return sanitized_title + ".json"
|
293
|
+
|
294
|
+
"""
|
295
|
+
Save an article to a JSON file.
|
296
|
+
"""
|
297
|
+
article_data = {
|
298
|
+
"title": article.title,
|
299
|
+
"authors": article.authors,
|
300
|
+
"publish_date": str(article.publish_date) if article.publish_date else None,
|
301
|
+
"top_image": article.top_image,
|
302
|
+
"images": article.images,
|
303
|
+
"text": article.text,
|
304
|
+
"url": article.original_url,
|
305
|
+
"summary": article.summary,
|
306
|
+
"keywords": article.keywords,
|
307
|
+
"keyword_scores": article.keyword_scores,
|
308
|
+
"tags": article.tags,
|
309
|
+
"meta_keywords": article.meta_keywords,
|
310
|
+
"meta_description": article.meta_description,
|
311
|
+
"canonical_link": article.canonical_link,
|
312
|
+
"meta_data": article.meta_data,
|
313
|
+
"meta_lang": article.meta_lang,
|
314
|
+
"source_url": article.source_url,
|
315
|
+
}
|
316
|
+
|
317
|
+
if not filename:
|
318
|
+
# Use the article title to create a filename
|
319
|
+
filename = sanitize_filename(article.title)
|
320
|
+
else:
|
321
|
+
# Ensure the filename ends with .json
|
322
|
+
if not filename.endswith(".json"):
|
323
|
+
filename += ".json"
|
324
|
+
with open(filename, "w") as f:
|
325
|
+
json.dump(article_data, f, indent=4)
|
326
|
+
print(f"Article saved to {filename}")
|
@@ -0,0 +1,260 @@
|
|
1
|
+
from fastmcp import FastMCP
|
2
|
+
from fastmcp.exceptions import ToolError
|
3
|
+
from fastmcp.server.dependencies import get_context
|
4
|
+
from pydantic import BaseModel, Field
|
5
|
+
from typing import Optional
|
6
|
+
from google_news_trends_mcp import news
|
7
|
+
from typing import Annotated
|
8
|
+
from fastmcp.server.middleware.timing import TimingMiddleware
|
9
|
+
from fastmcp.server.middleware.logging import LoggingMiddleware
|
10
|
+
from fastmcp.server.middleware.rate_limiting import RateLimitingMiddleware
|
11
|
+
from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware
|
12
|
+
|
13
|
+
|
14
|
+
class ArticleOut(BaseModel):
|
15
|
+
read_more_link: Annotated[
|
16
|
+
Optional[str], Field(description="Link to read more about the article.")
|
17
|
+
] = None
|
18
|
+
language: Annotated[
|
19
|
+
Optional[str], Field(description="Language code of the article.")
|
20
|
+
] = None
|
21
|
+
meta_img: Annotated[Optional[str], Field(description="Meta image URL.")] = None
|
22
|
+
movies: Annotated[
|
23
|
+
Optional[list[str]], Field(description="List of movie URLs or IDs.")
|
24
|
+
] = None
|
25
|
+
meta_favicon: Annotated[
|
26
|
+
Optional[str], Field(description="Favicon URL from meta data.")
|
27
|
+
] = None
|
28
|
+
meta_site_name: Annotated[
|
29
|
+
Optional[str], Field(description="Site name from meta data.")
|
30
|
+
] = None
|
31
|
+
title: Annotated[str, Field(description="Title of the article.")]
|
32
|
+
authors: Annotated[Optional[list[str]], Field(description="list of authors.")] = (
|
33
|
+
None
|
34
|
+
)
|
35
|
+
publish_date: Annotated[
|
36
|
+
Optional[str], Field(description="Publish date in ISO format.")
|
37
|
+
] = None
|
38
|
+
top_image: Annotated[Optional[str], Field(description="URL of the top image.")] = (
|
39
|
+
None
|
40
|
+
)
|
41
|
+
images: Annotated[Optional[list[str]], Field(description="list of image URLs.")] = (
|
42
|
+
None
|
43
|
+
)
|
44
|
+
text: Annotated[str, Field(description="Full text of the article.")]
|
45
|
+
url: Annotated[str, Field(description="Original article URL.")]
|
46
|
+
summary: Annotated[Optional[str], Field(description="Summary of the article.")] = (
|
47
|
+
None
|
48
|
+
)
|
49
|
+
keywords: Annotated[
|
50
|
+
Optional[list[str]], Field(description="Extracted keywords.")
|
51
|
+
] = None
|
52
|
+
tags: Annotated[Optional[list[str]], Field(description="Tags for the article.")] = (
|
53
|
+
None
|
54
|
+
)
|
55
|
+
meta_keywords: Annotated[
|
56
|
+
Optional[list[str]], Field(description="Meta keywords from the article.")
|
57
|
+
] = None
|
58
|
+
meta_description: Annotated[
|
59
|
+
Optional[str], Field(description="Meta description from the article.")
|
60
|
+
] = None
|
61
|
+
canonical_link: Annotated[
|
62
|
+
Optional[str], Field(description="Canonical link for the article.")
|
63
|
+
] = None
|
64
|
+
meta_data: Annotated[
|
65
|
+
Optional[dict[str, str | int]], Field(description="Meta data dictionary.")
|
66
|
+
] = None
|
67
|
+
meta_lang: Annotated[
|
68
|
+
Optional[str], Field(description="Language of the article.")
|
69
|
+
] = None
|
70
|
+
source_url: Annotated[
|
71
|
+
Optional[str], Field(description="Source URL if different from original.")
|
72
|
+
] = None
|
73
|
+
|
74
|
+
|
75
|
+
class TrendingTermArticleOut(BaseModel):
|
76
|
+
title: Annotated[str, Field(description="Article title.")] = ""
|
77
|
+
url: Annotated[str, Field(description="Article URL.")] = ""
|
78
|
+
source: Annotated[Optional[str], Field(description="News source name.")] = None
|
79
|
+
picture: Annotated[Optional[str], Field(description="URL to article image.")] = None
|
80
|
+
time: Annotated[
|
81
|
+
Optional[str | int], Field(description="Publication time or timestamp.")
|
82
|
+
] = None
|
83
|
+
snippet: Annotated[Optional[str], Field(description="Article preview text.")] = None
|
84
|
+
|
85
|
+
|
86
|
+
class TrendingTermOut(BaseModel):
|
87
|
+
keyword: Annotated[str, Field(description="Trending keyword.")]
|
88
|
+
volume: Annotated[Optional[int], Field(description="Search volume.")] = None
|
89
|
+
geo: Annotated[Optional[str], Field(description="Geographic location code.")] = None
|
90
|
+
started_timestamp: Annotated[
|
91
|
+
Optional[list],
|
92
|
+
Field(
|
93
|
+
description="When the trend started (year, month, day, hour, minute, second)."
|
94
|
+
),
|
95
|
+
] = None
|
96
|
+
ended_timestamp: Annotated[
|
97
|
+
Optional[tuple[int, int]],
|
98
|
+
Field(
|
99
|
+
description="When the trend ended (year, month, day, hour, minute, second)."
|
100
|
+
),
|
101
|
+
] = None
|
102
|
+
volume_growth_pct: Annotated[
|
103
|
+
Optional[float], Field(description="Percentage growth in search volume.")
|
104
|
+
] = None
|
105
|
+
trend_keywords: Annotated[
|
106
|
+
Optional[list[str]], Field(description="Related keywords.")
|
107
|
+
] = None
|
108
|
+
topics: Annotated[
|
109
|
+
Optional[list[str | int]], Field(description="Related topics.")
|
110
|
+
] = None
|
111
|
+
news: Annotated[
|
112
|
+
Optional[list[TrendingTermArticleOut]],
|
113
|
+
Field(description="Related news articles."),
|
114
|
+
] = None
|
115
|
+
news_tokens: Annotated[
|
116
|
+
Optional[list], Field(description="Associated news tokens.")
|
117
|
+
] = None
|
118
|
+
normalized_keyword: Annotated[
|
119
|
+
Optional[str], Field(description="Normalized form of the keyword.")
|
120
|
+
] = None
|
121
|
+
|
122
|
+
|
123
|
+
mcp = FastMCP(
|
124
|
+
name="google-news-trends",
|
125
|
+
instructions="This server provides tools to search, analyze, and summarize Google News articles and Google Trends",
|
126
|
+
on_duplicate_tools="replace",
|
127
|
+
)
|
128
|
+
|
129
|
+
mcp.add_middleware(ErrorHandlingMiddleware()) # Handle errors first
|
130
|
+
mcp.add_middleware(RateLimitingMiddleware(max_requests_per_second=50))
|
131
|
+
mcp.add_middleware(TimingMiddleware()) # Time actual execution
|
132
|
+
mcp.add_middleware(LoggingMiddleware()) # Log everything
|
133
|
+
|
134
|
+
|
135
|
+
@mcp.tool(
|
136
|
+
description=news.get_news_by_keyword.__doc__,
|
137
|
+
tags={"news", "articles", "keyword"},
|
138
|
+
)
|
139
|
+
async def get_news_by_keyword(
|
140
|
+
keyword: Annotated[str, Field(description="Search term to find articles.")],
|
141
|
+
period: Annotated[
|
142
|
+
int, Field(description="Number of days to look back for articles.", ge=1)
|
143
|
+
] = 7,
|
144
|
+
max_results: Annotated[
|
145
|
+
int, Field(description="Maximum number of results to return.", ge=1)
|
146
|
+
] = 10,
|
147
|
+
nlp: Annotated[
|
148
|
+
bool, Field(description="Whether to perform NLP on the articles.")
|
149
|
+
] = True,
|
150
|
+
) -> list[ArticleOut]:
|
151
|
+
articles = await news.get_news_by_keyword(
|
152
|
+
keyword=keyword,
|
153
|
+
period=period,
|
154
|
+
max_results=max_results,
|
155
|
+
nlp=nlp,
|
156
|
+
)
|
157
|
+
return [ArticleOut(**a.to_json(False)) for a in articles]
|
158
|
+
|
159
|
+
|
160
|
+
@mcp.tool(
|
161
|
+
description=news.get_news_by_location.__doc__,
|
162
|
+
tags={"news", "articles", "location"},
|
163
|
+
)
|
164
|
+
async def get_news_by_location(
|
165
|
+
location: Annotated[str, Field(description="Name of city/state/country.")],
|
166
|
+
period: Annotated[
|
167
|
+
int, Field(description="Number of days to look back for articles.", ge=1)
|
168
|
+
] = 7,
|
169
|
+
max_results: Annotated[
|
170
|
+
int, Field(description="Maximum number of results to return.", ge=1)
|
171
|
+
] = 10,
|
172
|
+
nlp: Annotated[
|
173
|
+
bool, Field(description="Whether to perform NLP on the articles.")
|
174
|
+
] = True,
|
175
|
+
) -> list[ArticleOut]:
|
176
|
+
articles = await news.get_news_by_location(
|
177
|
+
location=location,
|
178
|
+
period=period,
|
179
|
+
max_results=max_results,
|
180
|
+
nlp=nlp,
|
181
|
+
)
|
182
|
+
return [ArticleOut(**a.to_json(False)) for a in articles]
|
183
|
+
|
184
|
+
|
185
|
+
@mcp.tool(
|
186
|
+
description=news.get_news_by_topic.__doc__, tags={"news", "articles", "topic"}
|
187
|
+
)
|
188
|
+
async def get_news_by_topic(
|
189
|
+
topic: Annotated[str, Field(description="Topic to search for articles.")],
|
190
|
+
period: Annotated[
|
191
|
+
int, Field(description="Number of days to look back for articles.", ge=1)
|
192
|
+
] = 7,
|
193
|
+
max_results: Annotated[
|
194
|
+
int, Field(description="Maximum number of results to return.", ge=1)
|
195
|
+
] = 10,
|
196
|
+
nlp: Annotated[
|
197
|
+
bool, Field(description="Whether to perform NLP on the articles.")
|
198
|
+
] = True,
|
199
|
+
) -> list[ArticleOut]:
|
200
|
+
articles = await news.get_news_by_topic(
|
201
|
+
topic=topic,
|
202
|
+
period=period,
|
203
|
+
max_results=max_results,
|
204
|
+
nlp=nlp,
|
205
|
+
)
|
206
|
+
return [ArticleOut(**a.to_json(False)) for a in articles]
|
207
|
+
|
208
|
+
|
209
|
+
@mcp.tool(description=news.get_top_news.__doc__, tags={"news", "articles", "top"})
|
210
|
+
async def get_top_news(
|
211
|
+
period: Annotated[
|
212
|
+
int, Field(description="Number of days to look back for top articles.", ge=1)
|
213
|
+
] = 3,
|
214
|
+
max_results: Annotated[
|
215
|
+
int, Field(description="Maximum number of results to return.", ge=1)
|
216
|
+
] = 10,
|
217
|
+
nlp: Annotated[
|
218
|
+
bool, Field(description="Whether to perform NLP on the articles.")
|
219
|
+
] = True,
|
220
|
+
) -> list[ArticleOut]:
|
221
|
+
articles = await news.get_top_news(
|
222
|
+
period=period,
|
223
|
+
max_results=max_results,
|
224
|
+
nlp=nlp,
|
225
|
+
)
|
226
|
+
ctx = get_context()
|
227
|
+
await ctx.debug(f"Top Articles:\n{articles[0].to_json(False)}")
|
228
|
+
return [ArticleOut(**a.to_json(False)) for a in articles]
|
229
|
+
|
230
|
+
|
231
|
+
@mcp.tool(
|
232
|
+
description=news.get_trending_terms.__doc__, tags={"trends", "google", "trending"}
|
233
|
+
)
|
234
|
+
async def get_trending_terms(
|
235
|
+
geo: Annotated[
|
236
|
+
str, Field(description="Country code, e.g. 'US', 'GB', 'IN', etc.")
|
237
|
+
] = "US",
|
238
|
+
full_data: Annotated[
|
239
|
+
bool,
|
240
|
+
Field(
|
241
|
+
description="Return full data for each trend. Should be False for most use cases."
|
242
|
+
),
|
243
|
+
] = False,
|
244
|
+
max_results: Annotated[
|
245
|
+
int, Field(description="Maximum number of results to return.", ge=1)
|
246
|
+
] = 100,
|
247
|
+
) -> list[TrendingTermOut]:
|
248
|
+
trends = await news.get_trending_terms(
|
249
|
+
geo=geo, full_data=full_data, max_results=max_results
|
250
|
+
)
|
251
|
+
ctx = get_context()
|
252
|
+
await ctx.debug(f"Found {(trends)} trending terms")
|
253
|
+
if not full_data:
|
254
|
+
# return [TrendingTermOut(keyword=tt[0], volume=tt[1]) for tt in trends]
|
255
|
+
return [TrendingTermOut(keyword=tt[0], volume=tt[1]) for tt in trends]
|
256
|
+
return [TrendingTermOut(**tt.__dict__) for tt in trends]
|
257
|
+
|
258
|
+
|
259
|
+
def main():
|
260
|
+
mcp.run()
|
@@ -0,0 +1,182 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: google-news-trends-mcp
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Add your description here
|
5
|
+
Author-email: Jesse Manek <jesse.manek@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Requires-Python: >=3.10.18
|
8
|
+
Description-Content-Type: text/markdown
|
9
|
+
License-File: LICENSE
|
10
|
+
Requires-Dist: asyncio>=3.4.3
|
11
|
+
Requires-Dist: click>=8.2.1
|
12
|
+
Requires-Dist: cloudscraper>=1.2.71
|
13
|
+
Requires-Dist: fastmcp>=2.9.2
|
14
|
+
Requires-Dist: gnews>=0.4.1
|
15
|
+
Requires-Dist: googlenewsdecoder>=0.1.7
|
16
|
+
Requires-Dist: lxml[html-clean]>=6.0.0
|
17
|
+
Requires-Dist: newspaper4k>=0.9.3.1
|
18
|
+
Requires-Dist: nltk>=3.9.1
|
19
|
+
Requires-Dist: playwright>=1.53.0
|
20
|
+
Requires-Dist: pydantic>=2.11.7
|
21
|
+
Requires-Dist: pytest>=8.4.1
|
22
|
+
Requires-Dist: pytest-asyncio>=1.0.0
|
23
|
+
Requires-Dist: trendspy>=0.1.6
|
24
|
+
Dynamic: license-file
|
25
|
+
|
26
|
+
# Google News Trends MCP
|
27
|
+
|
28
|
+
An MCP server to access Google News and Google Trends. Does not rely on any paid APIs.
|
29
|
+
The returned data currently uses a lot of tokens, so it is recommended to always use limits when making requests.
|
30
|
+
|
31
|
+
## Features
|
32
|
+
|
33
|
+
- Search Google News articles based on keyword, location, topic
|
34
|
+
- Get top news stories from Google News
|
35
|
+
- Google Trends keywords base on location
|
36
|
+
- Optional NLP processing to summarize articles and extract keywords
|
37
|
+
|
38
|
+
## Installation
|
39
|
+
|
40
|
+
### Using uv/uvx (recommended)
|
41
|
+
|
42
|
+
When using [`uv`](https://docs.astral.sh/uv/) no specific installation is needed. We will
|
43
|
+
use [`uvx`](https://docs.astral.sh/uv/guides/tools/) to directly run *google-news-trends-mcp*.
|
44
|
+
|
45
|
+
### Using PIP
|
46
|
+
|
47
|
+
```bash
|
48
|
+
pip install google-news-trends-mcp
|
49
|
+
```
|
50
|
+
After installation, you can run it as a script using:
|
51
|
+
|
52
|
+
```bash
|
53
|
+
python -m google-news-trends-mcp
|
54
|
+
```
|
55
|
+
|
56
|
+
## Configuration
|
57
|
+
|
58
|
+
### Configure for Claude.app
|
59
|
+
|
60
|
+
Add to your Claude settings:
|
61
|
+
|
62
|
+
<details>
|
63
|
+
<summary>Using uvx</summary>
|
64
|
+
|
65
|
+
```json
|
66
|
+
{
|
67
|
+
"mcpServers": {
|
68
|
+
"google-news-trends": {
|
69
|
+
"command": "uvx",
|
70
|
+
"args": ["google-news-trends-mcp"]
|
71
|
+
}
|
72
|
+
}
|
73
|
+
}
|
74
|
+
```
|
75
|
+
</details>
|
76
|
+
|
77
|
+
<details>
|
78
|
+
<summary>Using pip installation</summary>
|
79
|
+
|
80
|
+
```json
|
81
|
+
{
|
82
|
+
"mcpServers": {
|
83
|
+
"google-news-trends": {
|
84
|
+
"command": "python",
|
85
|
+
"args": ["-m", "google-news-trends-mcp"]
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
```
|
90
|
+
</details>
|
91
|
+
|
92
|
+
### Configure for VS Code
|
93
|
+
|
94
|
+
<details>
|
95
|
+
<summary>Using uvx</summary>
|
96
|
+
|
97
|
+
```json
|
98
|
+
{
|
99
|
+
"mcp": {
|
100
|
+
"servers": {
|
101
|
+
"google-news-trends": {
|
102
|
+
"command": "uvx",
|
103
|
+
"args": ["google-news-trends-mcp"]
|
104
|
+
}
|
105
|
+
}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
```
|
109
|
+
</details>
|
110
|
+
|
111
|
+
<details>
|
112
|
+
<summary>Using pip installation</summary>
|
113
|
+
|
114
|
+
```json
|
115
|
+
{
|
116
|
+
"mcp": {
|
117
|
+
"servers": {
|
118
|
+
"google-news-trends": {
|
119
|
+
"command": "python",
|
120
|
+
"args": ["-m", "google-news-trends-mcp"]
|
121
|
+
}
|
122
|
+
}
|
123
|
+
}
|
124
|
+
}
|
125
|
+
```
|
126
|
+
</details>
|
127
|
+
|
128
|
+
|
129
|
+
## Tools
|
130
|
+
|
131
|
+
The following MCP tools are available:
|
132
|
+
|
133
|
+
| Tool Name | Description |
|
134
|
+
|--------------------------|--------------------------------------------------------------------|
|
135
|
+
| **get_news_by_keyword** | Search for news using specific keywords. |
|
136
|
+
| **get_news_by_location** | Retrieve news relevant to a particular location. |
|
137
|
+
| **get_news_by_topic** | Get news based on a chosen topic. |
|
138
|
+
| **get_top_news** | Fetch the top news stories from Google News. |
|
139
|
+
| **get_trending_keywords**| Return trending keywords from Google Trends for a specified location.|
|
140
|
+
|
141
|
+
All of the news related tools have an option to summarize the text of the article using NLP (nltk)
|
142
|
+
|
143
|
+
|
144
|
+
## CLI
|
145
|
+
All tools can be accessed from the command line using `uv`
|
146
|
+
|
147
|
+
```bash
|
148
|
+
uv run google-news-trends
|
149
|
+
Usage: google-news-trends [OPTIONS] COMMAND [ARGS]...
|
150
|
+
|
151
|
+
Find and download news articles using Google News.
|
152
|
+
|
153
|
+
Options:
|
154
|
+
--help Show this message and exit.
|
155
|
+
|
156
|
+
Commands:
|
157
|
+
keyword Find articles by keyword using Google News.
|
158
|
+
location Find articles by location using Google News.
|
159
|
+
top Get top news stories from Google News.
|
160
|
+
topic Find articles by topic using Google News.
|
161
|
+
trending Returns google trends for a specific geo location.
|
162
|
+
```
|
163
|
+
|
164
|
+
## Debugging
|
165
|
+
|
166
|
+
```bash
|
167
|
+
npx @modelcontextprotocol/inspector uvx google-news-trends-mcp
|
168
|
+
```
|
169
|
+
|
170
|
+
To run from within locally installed project
|
171
|
+
|
172
|
+
```bash
|
173
|
+
cd path/to/google/news/tends/mcp
|
174
|
+
npx @modelcontextprotocol/inspector uv run google-news-trends-mcp
|
175
|
+
```
|
176
|
+
|
177
|
+
## Testing
|
178
|
+
|
179
|
+
```bash
|
180
|
+
cd path/to/google/news/tends/mcp
|
181
|
+
python -m pytest
|
182
|
+
```
|
@@ -0,0 +1,11 @@
|
|
1
|
+
google_news_trends_mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
google_news_trends_mcp/__main__.py,sha256=ysiAk_xpnnW3lrLlzdIQQa71tuGBRT8WocbecBsY2Fs,87
|
3
|
+
google_news_trends_mcp/cli.py,sha256=fi0qocr-nc3UbGKOR5GLrmfsEjhU_M6ZJ7UAyLoC8ds,4012
|
4
|
+
google_news_trends_mcp/news.py,sha256=o3lNRx_lt1h2ojHW0pXhfQhs8n35pnvgeV6-iTi_1RA,12294
|
5
|
+
google_news_trends_mcp/server.py,sha256=qwQ_9UKnOLybUGCmUH4sJWxKsmJHZCg7PKimFXgr58c,9468
|
6
|
+
google_news_trends_mcp-0.1.0.dist-info/licenses/LICENSE,sha256=5dsv2ZI5EZIer0a9MktVmILVrlp5vqH_0tPIe3bRLgE,1067
|
7
|
+
google_news_trends_mcp-0.1.0.dist-info/METADATA,sha256=B0v0vmbUG9iMON4rnp_vlUOjwCcL046xphofUtbPkrQ,4263
|
8
|
+
google_news_trends_mcp-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
9
|
+
google_news_trends_mcp-0.1.0.dist-info/entry_points.txt,sha256=eVT3xd6YJQgsWAUBwhnffuwhXNF7yyt_uco6fjBy-1o,130
|
10
|
+
google_news_trends_mcp-0.1.0.dist-info/top_level.txt,sha256=RFheDbzhNnEV_Y3iFNm7jhRhY1P1wQgfiYqVpXCTD_U,23
|
11
|
+
google_news_trends_mcp-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Jesse Manek
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
google_news_trends_mcp
|