superbrain-server 1.0.46 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -58,7 +58,7 @@ any products/places/tools mentioned, and the overall purpose of the page]
|
|
|
58
58
|
[N/A — web page]
|
|
59
59
|
|
|
60
60
|
📂 CATEGORY:
|
|
61
|
-
[Choose exactly ONE from: product, places, recipe, software, book, tv shows,
|
|
61
|
+
[Choose exactly ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event, other]
|
|
62
62
|
|
|
63
63
|
Be specific and factual. Extract real names, numbers, and details from the content."""
|
|
64
64
|
|
|
@@ -287,8 +287,9 @@ def _fetch_reddit(url: str, timeout: int) -> tuple[str, str, str] | None:
|
|
|
287
287
|
|
|
288
288
|
# Proxies tried left-to-right; {url} is replaced with the full article URL.
|
|
289
289
|
_MEDIUM_PROXIES = [
|
|
290
|
-
"https://
|
|
291
|
-
"https://
|
|
290
|
+
"https://readmedium.com/en/{url}", # readmedium works
|
|
291
|
+
"https://scribe.rip/{url}", # scribe mirrors the article cleanly
|
|
292
|
+
"https://freedium.cfd/{url}", # alternative (sometimes down)
|
|
292
293
|
]
|
|
293
294
|
|
|
294
295
|
|
|
@@ -359,26 +360,25 @@ def _parse_proxy_page(html: str, orig_url: str) -> tuple[str, str, str]:
|
|
|
359
360
|
return title, "\n".join(lines), thumbnail, proxy_author, proxy_date
|
|
360
361
|
|
|
361
362
|
|
|
362
|
-
def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str] | None:
|
|
363
|
-
|
|
364
|
-
Try each Medium proxy in order; return first successful result.
|
|
365
|
-
"""
|
|
363
|
+
def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str, str, str] | None:
|
|
364
|
+
# Use r.jina.ai to cleanly extract Medium articles (bypasses Cloudflare).
|
|
366
365
|
import requests
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
366
|
+
try:
|
|
367
|
+
print(f" [medium] Fetching via r.jina.ai...")
|
|
368
|
+
jina_url = f"https://r.jina.ai/{url}"
|
|
369
|
+
r = requests.get(jina_url, headers={"Accept": "application/json"}, timeout=timeout)
|
|
370
|
+
r.raise_for_status()
|
|
371
|
+
data = r.json().get("data", {})
|
|
372
|
+
title = data.get("title", "")
|
|
373
|
+
text = data.get("content", "")
|
|
374
|
+
author = data.get("author", "")
|
|
375
|
+
image = data.get("image", "")
|
|
376
|
+
if text and len(text) > 200:
|
|
377
|
+
return title, text, image, author, ""
|
|
378
|
+
print(" [medium] r.jina.ai returned too little text")
|
|
379
|
+
except Exception as e:
|
|
380
|
+
print(f" [medium] r.jina.ai failed: {e}")
|
|
381
|
+
|
|
382
382
|
return None
|
|
383
383
|
|
|
384
384
|
|
|
@@ -45,7 +45,7 @@ identifiable background music, write "No background music". If it's voiceover
|
|
|
45
45
|
only, write "Voiceover only".]
|
|
46
46
|
|
|
47
47
|
📂 CATEGORY:
|
|
48
|
-
[Choose exactly ONE from: product, places, recipe, software, book, tv shows,
|
|
48
|
+
[Choose exactly ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event, other]
|
|
49
49
|
|
|
50
50
|
Be specific, accurate, and extractive — pull out real names, numbers, and facts from the video."""
|
|
51
51
|
|
package/payload/main.py
CHANGED
|
@@ -138,7 +138,7 @@ Generate a report in this EXACT format:
|
|
|
138
138
|
[Music/song name if found, or "No background music" or "Voiceover only"]
|
|
139
139
|
|
|
140
140
|
📂 CATEGORY:
|
|
141
|
-
[Choose ONE from: product, places, recipe, software, book, tv shows,
|
|
141
|
+
[Choose ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event]
|
|
142
142
|
|
|
143
143
|
Be specific, concise, and actionable. Focus on useful information."""
|
|
144
144
|
|
|
@@ -252,7 +252,7 @@ def parse_summary(summary_text):
|
|
|
252
252
|
|
|
253
253
|
# Fallback: Auto-detect category if empty or unrecognised
|
|
254
254
|
valid_categories = {'product', 'places', 'recipe', 'software', 'book',
|
|
255
|
-
'tv shows', '
|
|
255
|
+
'tv shows', 'fitness', 'film', 'event', 'food', 'other'}
|
|
256
256
|
if not category or category not in valid_categories:
|
|
257
257
|
category = auto_detect_category(summary_text, title, summary, tags)
|
|
258
258
|
|
|
@@ -271,10 +271,11 @@ def auto_detect_category(summary_text, title, summary, tags):
|
|
|
271
271
|
category_keywords = {
|
|
272
272
|
'product': ['camera', 'device', 'gadget', 'tech', 'phone', 'laptop', 'review', 'unbox', 'product', 'dji', 'osmo', 'action cam'],
|
|
273
273
|
'places': ['travel', 'trip', 'visit', 'destination', 'village', 'city', 'mountain', 'beach', 'hotel', 'itinerary', 'sikkim', 'location'],
|
|
274
|
-
'recipe': ['recipe', 'cooking', '
|
|
274
|
+
'recipe': ['recipe', 'cooking', 'dish', 'ingredients', 'cook', 'bake'],
|
|
275
|
+
'food': ['food', 'meal', 'cuisine', 'restaurant', 'cafe', 'dining', 'eat'],
|
|
275
276
|
'software': ['app', 'software', 'code', 'programming', 'developer', 'api', 'python', 'javascript'],
|
|
276
277
|
'book': ['book', 'novel', 'author', 'read', 'literature', 'story', 'chapter'],
|
|
277
|
-
'
|
|
278
|
+
'fitness': ['workout', 'fitness', 'exercise', 'gym', 'training', 'muscle', 'cardio', 'yoga'],
|
|
278
279
|
'film': ['movie', 'film', 'cinema', 'actor', 'actress', 'director', 'trailer', 'premiere'],
|
|
279
280
|
'tv shows': ['series', 'episode', 'season', 'show', 'tv show', 'streaming', 'netflix'],
|
|
280
281
|
'event': ['event', 'concert', 'festival', 'conference', 'meetup', 'workshop', 'seminar']
|
package/payload/requirements.txt
CHANGED
|
@@ -15,7 +15,7 @@ google-genai>=0.8.0
|
|
|
15
15
|
# ── Web Page Fetching & Parsing ───────────────────────────────────────────────
|
|
16
16
|
beautifulsoup4>=4.12.0
|
|
17
17
|
trafilatura>=1.12.0
|
|
18
|
-
newspaper4k>=0.9.0
|
|
18
|
+
newspaper4k[nlp]>=0.9.0
|
|
19
19
|
lxml>=5.0.0
|
|
20
20
|
lxml_html_clean>=0.1.0
|
|
21
21
|
htmldate>=1.9.0
|
package/payload/start.py
CHANGED