superbrain-server 1.0.46 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superbrain-server",
3
- "version": "1.0.46",
3
+ "version": "1.0.48",
4
4
  "description": "1-Line Auto-Installer and Server Execution wrapper for SuperBrain",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -58,7 +58,7 @@ any products/places/tools mentioned, and the overall purpose of the page]
58
58
  [N/A — web page]
59
59
 
60
60
  📂 CATEGORY:
61
- [Choose exactly ONE from: product, places, recipe, software, book, tv shows, workout, film, event, other]
61
+ [Choose exactly ONE from: product, places, food, software, book, tv shows, fitness, film, event, other]
62
62
 
63
63
  Be specific and factual. Extract real names, numbers, and details from the content."""
64
64
 
@@ -287,8 +287,9 @@ def _fetch_reddit(url: str, timeout: int) -> tuple[str, str, str] | None:
287
287
 
288
288
  # Proxies tried left-to-right; {url} is replaced with the full article URL.
289
289
  _MEDIUM_PROXIES = [
290
- "https://scribe.rip/{url}", # scribe mirrors the article cleanly
291
- "https://freedium.cfd/{url}", # alternative (sometimes down)
290
+ "https://readmedium.com/en/{url}", # readmedium works
291
+ "https://scribe.rip/{url}", # scribe mirrors the article cleanly
292
+ "https://freedium.cfd/{url}", # alternative (sometimes down)
292
293
  ]
293
294
 
294
295
 
@@ -359,26 +360,31 @@ def _parse_proxy_page(html: str, orig_url: str) -> tuple[str, str, str]:
359
360
  return title, "\n".join(lines), thumbnail, proxy_author, proxy_date
360
361
 
361
362
 
362
- def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str] | None:
363
- """
364
- Try each Medium proxy in order; return first successful result.
365
- """
363
+ def _fetch_medium(url: str, timeout: int) -> tuple[str, str, str, str, str] | None:
364
+ # Use r.jina.ai to cleanly extract Medium articles (bypasses Cloudflare).
366
365
  import requests
367
-
368
- for proxy_tpl in _MEDIUM_PROXIES:
369
- proxy_url = proxy_tpl.format(url=url)
370
- try:
371
- print(f" [medium] Trying {proxy_url[:55]}...")
372
- r = requests.get(proxy_url, headers=_HEADERS,
373
- timeout=timeout, allow_redirects=True)
374
- r.raise_for_status()
375
- title, text, thumbnail, auth, pd = _parse_proxy_page(r.text, url)
376
- if len(text) > 200:
377
- return title, text, thumbnail, auth, pd
378
- print(f" [medium] {proxy_url[:40]} returned too little text")
379
- except Exception as e:
380
- print(f" [medium] {proxy_url[:40]} failed: {e}")
381
-
366
+ try:
367
+ print(f" [medium] Fetching via r.jina.ai...")
368
+ jina_url = f"https://r.jina.ai/{url}"
369
+ r = requests.get(jina_url, headers={"Accept": "application/json"}, timeout=timeout)
370
+ r.raise_for_status()
371
+ data = r.json().get("data", {})
372
+ metadata = data.get("metadata", {})
373
+ title = data.get("title", "") or metadata.get("title", "")
374
+ text = data.get("content", "")
375
+ author = metadata.get("author", "") or metadata.get("article:author", "") or data.get("author", "")
376
+ image = metadata.get("og:image", "") or metadata.get("twitter:image:src", "")
377
+ post_date = metadata.get("article:published_time", "")
378
+ if post_date:
379
+ post_date = post_date[:10] # Just YYYY-MM-DD
380
+ if not image:
381
+ image = _get_favicon_url(url)
382
+ if text and len(text) > 200:
383
+ return title, text, image, author, post_date
384
+ print(" [medium] r.jina.ai returned too little text")
385
+ except Exception as e:
386
+ print(f" [medium] r.jina.ai failed: {e}")
387
+
382
388
  return None
383
389
 
384
390
 
@@ -45,7 +45,7 @@ identifiable background music, write "No background music". If it's voiceover
45
45
  only, write "Voiceover only".]
46
46
 
47
47
  📂 CATEGORY:
48
- [Choose exactly ONE from: product, places, recipe, software, book, tv shows, workout, film, event, other]
48
+ [Choose exactly ONE from: product, places, food, software, book, tv shows, fitness, film, event, other]
49
49
 
50
50
  Be specific, accurate, and extractive — pull out real names, numbers, and facts from the video."""
51
51
 
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "groq_gpt_oss_20b": {
3
3
  "key": "groq_gpt_oss_20b",
4
- "avg_response_s": 1.476613248725698,
5
- "success_count": 81,
4
+ "avg_response_s": 1.4180567239439565,
5
+ "success_count": 84,
6
6
  "fail_count": 6,
7
7
  "down_until": null,
8
- "last_used": "2026-04-09T18:25:59.312220",
8
+ "last_used": "2026-04-09T21:32:47.146221",
9
9
  "last_error": null,
10
10
  "base_priority": 0.5
11
11
  },
package/payload/main.py CHANGED
@@ -138,7 +138,7 @@ Generate a report in this EXACT format:
138
138
  [Music/song name if found, or "No background music" or "Voiceover only"]
139
139
 
140
140
  📂 CATEGORY:
141
- [Choose ONE from: product, places, recipe, software, book, tv shows, workout, film, event]
141
+ [Choose ONE from: product, places, food, software, book, tv shows, fitness, film, event]
142
142
 
143
143
  Be specific, concise, and actionable. Focus on useful information."""
144
144
 
@@ -251,8 +251,8 @@ def parse_summary(summary_text):
251
251
  print(f"⚠️ Error parsing summary: {e}")
252
252
 
253
253
  # Fallback: Auto-detect category if empty or unrecognised
254
- valid_categories = {'product', 'places', 'recipe', 'software', 'book',
255
- 'tv shows', 'workout', 'film', 'event', 'other'}
254
+ valid_categories = {'product', 'places', 'software', 'book',
255
+ 'tv shows', 'fitness', 'film', 'event', 'food', 'other'}
256
256
  if not category or category not in valid_categories:
257
257
  category = auto_detect_category(summary_text, title, summary, tags)
258
258
 
@@ -271,10 +271,10 @@ def auto_detect_category(summary_text, title, summary, tags):
271
271
  category_keywords = {
272
272
  'product': ['camera', 'device', 'gadget', 'tech', 'phone', 'laptop', 'review', 'unbox', 'product', 'dji', 'osmo', 'action cam'],
273
273
  'places': ['travel', 'trip', 'visit', 'destination', 'village', 'city', 'mountain', 'beach', 'hotel', 'itinerary', 'sikkim', 'location'],
274
- 'recipe': ['recipe', 'cooking', 'food', 'dish', 'ingredients', 'cook', 'bake', 'meal', 'cuisine'],
274
+ 'food': ['food', 'meal', 'cuisine', 'restaurant', 'cafe', 'dining', 'eat', 'recipe', 'cooking', 'dish', 'ingredients', 'cook', 'bake'],
275
275
  'software': ['app', 'software', 'code', 'programming', 'developer', 'api', 'python', 'javascript'],
276
276
  'book': ['book', 'novel', 'author', 'read', 'literature', 'story', 'chapter'],
277
- 'workout': ['workout', 'fitness', 'exercise', 'gym', 'training', 'muscle', 'cardio', 'yoga'],
277
+ 'fitness': ['workout', 'fitness', 'exercise', 'gym', 'training', 'muscle', 'cardio', 'yoga'],
278
278
  'film': ['movie', 'film', 'cinema', 'actor', 'actress', 'director', 'trailer', 'premiere'],
279
279
  'tv shows': ['series', 'episode', 'season', 'show', 'tv show', 'streaming', 'netflix'],
280
280
  'event': ['event', 'concert', 'festival', 'conference', 'meetup', 'workshop', 'seminar']
@@ -15,7 +15,7 @@ google-genai>=0.8.0
15
15
  # ── Web Page Fetching & Parsing ───────────────────────────────────────────────
16
16
  beautifulsoup4>=4.12.0
17
17
  trafilatura>=1.12.0
18
- newspaper4k>=0.9.0
18
+ newspaper4k[nlp]>=0.9.0
19
19
  lxml>=5.0.0
20
20
  lxml_html_clean>=0.1.0
21
21
  htmldate>=1.9.0
package/payload/start.py CHANGED
@@ -142,7 +142,7 @@ CORE_PACKAGES = [
142
142
  "google-genai>=0.8.0",
143
143
  "beautifulsoup4>=4.12.0",
144
144
  "trafilatura>=1.12.0",
145
- "newspaper4k>=0.9.0",
145
+ "newspaper4k[nlp]>=0.9.0",
146
146
  "lxml>=5.0.0",
147
147
  "lxml_html_clean>=0.1.0",
148
148
  "htmldate>=1.9.0",