npm - superbrain-server - Versions diffs - 1.0.46 → 1.0.47 - Mend

superbrain-server 1.0.46 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/payload/analyzers/webpage_analyzer.py +22 -22
package/payload/analyzers/youtube_analyzer.py +1 -1
package/payload/main.py +5 -4
package/payload/requirements.txt +1 -1
package/payload/start.py +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superbrain-server",
-  "version": "1.0.46",
+  "version": "1.0.47",
   "description": "1-Line Auto-Installer and Server Execution wrapper for SuperBrain",
   "main": "index.js",
   "bin": {

package/payload/analyzers/webpage_analyzer.py CHANGED Viewed

@@ -58,7 +58,7 @@ any products/places/tools mentioned, and the overall purpose of the page]
 [N/A — web page]
 📂 CATEGORY:
-[Choose exactly ONE from: product, places, recipe, software, book, tv shows, workout, film, event, other]
+[Choose exactly ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event, other]
 Be specific and factual. Extract real names, numbers, and details from the content."""
@@ -287,8 +287,9 @@ def _fetch_reddit(url: str, timeout: int) -> tuple[str, str, str] | None:
 # Proxies tried left-to-right; {url} is replaced with the full article URL.
 _MEDIUM_PROXIES = [
-    "https://scribe.rip/{url}",       # scribe mirrors the article cleanly
-    "https://freedium.cfd/{url}",     # alternative (sometimes down)
+    "https://readmedium.com/en/{url}",  # readmedium works
+    "https://scribe.rip/{url}",         # scribe mirrors the article cleanly
+    "https://freedium.cfd/{url}",       # alternative (sometimes down)
 ]
@@ -359,26 +360,25 @@ def _parse_proxy_page(html: str, orig_url: str) -> tuple[str, str, str]:
     return title, "\n".join(lines), thumbnail, proxy_author, proxy_date
-def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str] | None:
-    """
-    Try each Medium proxy in order; return first successful result.
-    """
+def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str, str, str] | None:
+    # Use r.jina.ai to cleanly extract Medium articles (bypasses Cloudflare).
     import requests
-    for proxy_tpl in _MEDIUM_PROXIES:
-        proxy_url = proxy_tpl.format(url=url)
-        try:
-            print(f"    [medium] Trying {proxy_url[:55]}...")
-            r = requests.get(proxy_url, headers=_HEADERS,
-                             timeout=timeout, allow_redirects=True)
-            r.raise_for_status()
-            title, text, thumbnail, auth, pd = _parse_proxy_page(r.text, url)
-            if len(text) > 200:
-                return title, text, thumbnail, auth, pd
-            print(f"    [medium] {proxy_url[:40]} returned too little text")
-        except Exception as e:
-            print(f"    [medium] {proxy_url[:40]} failed: {e}")
+    try:
+        print(f"    [medium] Fetching via r.jina.ai...")
+        jina_url = f"https://r.jina.ai/{url}"
+        r = requests.get(jina_url, headers={"Accept": "application/json"}, timeout=timeout)
+        r.raise_for_status()
+        data = r.json().get("data", {})
+        title = data.get("title", "")
+        text = data.get("content", "")
+        author = data.get("author", "")
+        image = data.get("image", "")
+        if text and len(text) > 200:
+            return title, text, image, author, ""
+        print("    [medium] r.jina.ai returned too little text")
+    except Exception as e:
+        print(f"    [medium] r.jina.ai failed: {e}")
     return None

package/payload/analyzers/youtube_analyzer.py CHANGED Viewed

@@ -45,7 +45,7 @@ identifiable background music, write "No background music". If it's voiceover
 only, write "Voiceover only".]
 📂 CATEGORY:
-[Choose exactly ONE from: product, places, recipe, software, book, tv shows, workout, film, event, other]
+[Choose exactly ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event, other]
 Be specific, accurate, and extractive — pull out real names, numbers, and facts from the video."""

package/payload/main.py CHANGED Viewed

@@ -138,7 +138,7 @@ Generate a report in this EXACT format:
 [Music/song name if found, or "No background music" or "Voiceover only"]
 📂 CATEGORY:
-[Choose ONE from: product, places, recipe, software, book, tv shows, workout, film, event]
+[Choose ONE from: product, places, recipe, food, software, book, tv shows, fitness, film, event]
 Be specific, concise, and actionable. Focus on useful information."""
@@ -252,7 +252,7 @@ def parse_summary(summary_text):
     # Fallback: Auto-detect category if empty or unrecognised
     valid_categories = {'product', 'places', 'recipe', 'software', 'book',
-                        'tv shows', 'workout', 'film', 'event', 'other'}
+                        'tv shows', 'fitness', 'film', 'event', 'food', 'other'}
     if not category or category not in valid_categories:
         category = auto_detect_category(summary_text, title, summary, tags)
@@ -271,10 +271,11 @@ def auto_detect_category(summary_text, title, summary, tags):
     category_keywords = {
         'product': ['camera', 'device', 'gadget', 'tech', 'phone', 'laptop', 'review', 'unbox', 'product', 'dji', 'osmo', 'action cam'],
         'places': ['travel', 'trip', 'visit', 'destination', 'village', 'city', 'mountain', 'beach', 'hotel', 'itinerary', 'sikkim', 'location'],
-        'recipe': ['recipe', 'cooking', 'food', 'dish', 'ingredients', 'cook', 'bake', 'meal', 'cuisine'],
+        'recipe': ['recipe', 'cooking', 'dish', 'ingredients', 'cook', 'bake'],
+        'food': ['food', 'meal', 'cuisine', 'restaurant', 'cafe', 'dining', 'eat'],
         'software': ['app', 'software', 'code', 'programming', 'developer', 'api', 'python', 'javascript'],
         'book': ['book', 'novel', 'author', 'read', 'literature', 'story', 'chapter'],
-        'workout': ['workout', 'fitness', 'exercise', 'gym', 'training', 'muscle', 'cardio', 'yoga'],
+        'fitness': ['workout', 'fitness', 'exercise', 'gym', 'training', 'muscle', 'cardio', 'yoga'],
         'film': ['movie', 'film', 'cinema', 'actor', 'actress', 'director', 'trailer', 'premiere'],
         'tv shows': ['series', 'episode', 'season', 'show', 'tv show', 'streaming', 'netflix'],
         'event': ['event', 'concert', 'festival', 'conference', 'meetup', 'workshop', 'seminar']

package/payload/requirements.txt CHANGED Viewed

@@ -15,7 +15,7 @@ google-genai>=0.8.0
 # ── Web Page Fetching & Parsing ───────────────────────────────────────────────
 beautifulsoup4>=4.12.0
 trafilatura>=1.12.0
-newspaper4k>=0.9.0
+newspaper4k[nlp]>=0.9.0
 lxml>=5.0.0
 lxml_html_clean>=0.1.0
 htmldate>=1.9.0

package/payload/start.py CHANGED Viewed

@@ -142,7 +142,7 @@ CORE_PACKAGES = [
     "google-genai>=0.8.0",
     "beautifulsoup4>=4.12.0",
     "trafilatura>=1.12.0",
-    "newspaper4k>=0.9.0",
+    "newspaper4k[nlp]>=0.9.0",
     "lxml>=5.0.0",
     "lxml_html_clean>=0.1.0",
     "htmldate>=1.9.0",