videonut 1.2.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +272 -272
  2. package/USER_GUIDE.md +90 -90
  3. package/agents/core/eic.md +771 -771
  4. package/agents/creative/director.md +246 -246
  5. package/agents/creative/scriptwriter.md +207 -207
  6. package/agents/research/investigator.md +394 -394
  7. package/agents/technical/archivist.md +288 -288
  8. package/agents/technical/scavenger.md +247 -247
  9. package/config.yaml +61 -61
  10. package/docs/scriptwriter.md +42 -42
  11. package/file_validator.py +186 -186
  12. package/memory/short_term/asset_manifest.md +64 -64
  13. package/memory/short_term/investigation_dossier.md +31 -31
  14. package/memory/short_term/master_script.md +51 -51
  15. package/package.json +61 -64
  16. package/requirements.txt +8 -8
  17. package/tools/check_env.py +76 -76
  18. package/tools/downloaders/caption_reader.py +237 -237
  19. package/tools/downloaders/clip_grabber.py +82 -82
  20. package/tools/downloaders/image_grabber.py +105 -105
  21. package/tools/downloaders/pdf_reader.py +163 -163
  22. package/tools/downloaders/screenshotter.py +58 -58
  23. package/tools/downloaders/web_reader.py +69 -69
  24. package/tools/validators/link_checker.py +45 -45
  25. package/workflow_orchestrator.py +336 -336
  26. package/.claude/commands/archivist.toml +0 -12
  27. package/.claude/commands/director.toml +0 -12
  28. package/.claude/commands/eic.toml +0 -12
  29. package/.claude/commands/investigator.toml +0 -12
  30. package/.claude/commands/prompt.toml +0 -12
  31. package/.claude/commands/scavenger.toml +0 -12
  32. package/.claude/commands/scout.toml +0 -12
  33. package/.claude/commands/scriptwriter.toml +0 -12
  34. package/.claude/commands/seo.toml +0 -12
  35. package/.claude/commands/thumbnail.toml +0 -12
  36. package/.claude/commands/topic_scout.toml +0 -12
  37. package/.gemini/commands/archivist.toml +0 -12
  38. package/.gemini/commands/director.toml +0 -12
  39. package/.gemini/commands/eic.toml +0 -12
  40. package/.gemini/commands/investigator.toml +0 -12
  41. package/.gemini/commands/prompt.toml +0 -12
  42. package/.gemini/commands/scavenger.toml +0 -12
  43. package/.gemini/commands/scout.toml +0 -12
  44. package/.gemini/commands/scriptwriter.toml +0 -12
  45. package/.gemini/commands/seo.toml +0 -12
  46. package/.gemini/commands/thumbnail.toml +0 -12
  47. package/.gemini/commands/topic_scout.toml +0 -12
  48. package/.qwen/commands/archivist.toml +0 -12
  49. package/.qwen/commands/director.toml +0 -12
  50. package/.qwen/commands/eic.toml +0 -12
  51. package/.qwen/commands/investigator.toml +0 -12
  52. package/.qwen/commands/prompt.toml +0 -12
  53. package/.qwen/commands/scavenger.toml +0 -12
  54. package/.qwen/commands/scout.toml +0 -12
  55. package/.qwen/commands/scriptwriter.toml +0 -12
  56. package/.qwen/commands/seo.toml +0 -12
  57. package/.qwen/commands/thumbnail.toml +0 -12
  58. package/.qwen/commands/topic_scout.toml +0 -12
@@ -1,69 +1,69 @@
1
- import sys
2
- import argparse
3
- import time
4
- from random import uniform
5
- from playwright.sync_api import sync_playwright
6
-
7
- def read_webpage(url):
8
- try:
9
- # Add random delay to implement rate limiting
10
- delay = uniform(1, 3) # Random delay between 1-3 seconds
11
- print(f"Rate limiting: Waiting {delay:.2f} seconds before accessing {url}")
12
- time.sleep(delay)
13
-
14
- with sync_playwright() as p:
15
- # Launch browser (headless by default)
16
- browser = p.chromium.launch()
17
- page = browser.new_page()
18
-
19
- # Set additional headers to appear more like a real user
20
- page.set_extra_http_headers({
21
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
22
- "Accept-Language": "en-US,en;q=0.5",
23
- "Accept-Encoding": "gzip, deflate",
24
- "Connection": "keep-alive",
25
- "Upgrade-Insecure-Requests": "1",
26
- })
27
-
28
- # Navigate with a reasonable timeout
29
- page.goto(url, timeout=30000)
30
-
31
- # Wait for content to load (basic heuristic)
32
- page.wait_for_load_state("domcontentloaded")
33
-
34
- # Get the text content
35
- # We use evaluate to get innerText which mimics what a user sees (hidden text is ignored)
36
- text = page.evaluate("document.body.innerText")
37
-
38
- # Basic cleanup: Remove excessive newlines
39
- clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
40
-
41
- # Smart truncation: Preserve intro AND conclusion (critical for research)
42
- MAX_TOTAL = 40000 # Increased from 25000
43
- INTRO_SIZE = 8000 # First portion (hook/summary)
44
- OUTRO_SIZE = 8000 # Last portion (conclusion/recommendations)
45
-
46
- if len(clean_text) > MAX_TOTAL:
47
- intro = clean_text[:INTRO_SIZE]
48
- outro = clean_text[-OUTRO_SIZE:]
49
- truncated_chars = len(clean_text) - MAX_TOTAL
50
-
51
- print(f"--- CONTENT START (First {INTRO_SIZE} chars) ---")
52
- print(intro)
53
- print(f"\n\n[... {truncated_chars:,} CHARACTERS TRUNCATED - Middle section omitted to preserve intro and conclusion ...]\n\n")
54
- print(f"--- CONTENT END (Last {OUTRO_SIZE} chars) ---")
55
- print(outro)
56
- else:
57
- print(clean_text)
58
-
59
- browser.close()
60
-
61
- except Exception as e:
62
- print(f"Error reading webpage: {e}")
63
- sys.exit(1)
64
-
65
- if __name__ == "__main__":
66
- parser = argparse.ArgumentParser()
67
- parser.add_argument("--url", required=True)
68
- args = parser.parse_args()
69
- read_webpage(args.url)
1
+ import sys
2
+ import argparse
3
+ import time
4
+ from random import uniform
5
+ from playwright.sync_api import sync_playwright
6
+
7
+ def read_webpage(url):
8
+ try:
9
+ # Add random delay to implement rate limiting
10
+ delay = uniform(1, 3) # Random delay between 1-3 seconds
11
+ print(f"Rate limiting: Waiting {delay:.2f} seconds before accessing {url}")
12
+ time.sleep(delay)
13
+
14
+ with sync_playwright() as p:
15
+ # Launch browser (headless by default)
16
+ browser = p.chromium.launch()
17
+ page = browser.new_page()
18
+
19
+ # Set additional headers to appear more like a real user
20
+ page.set_extra_http_headers({
21
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
22
+ "Accept-Language": "en-US,en;q=0.5",
23
+ "Accept-Encoding": "gzip, deflate",
24
+ "Connection": "keep-alive",
25
+ "Upgrade-Insecure-Requests": "1",
26
+ })
27
+
28
+ # Navigate with a reasonable timeout
29
+ page.goto(url, timeout=30000)
30
+
31
+ # Wait for content to load (basic heuristic)
32
+ page.wait_for_load_state("domcontentloaded")
33
+
34
+ # Get the text content
35
+ # We use evaluate to get innerText which mimics what a user sees (hidden text is ignored)
36
+ text = page.evaluate("document.body.innerText")
37
+
38
+ # Basic cleanup: Remove excessive newlines
39
+ clean_text = '\n'.join([line.strip() for line in text.splitlines() if line.strip()])
40
+
41
+ # Smart truncation: Preserve intro AND conclusion (critical for research)
42
+ MAX_TOTAL = 40000 # Increased from 25000
43
+ INTRO_SIZE = 8000 # First portion (hook/summary)
44
+ OUTRO_SIZE = 8000 # Last portion (conclusion/recommendations)
45
+
46
+ if len(clean_text) > MAX_TOTAL:
47
+ intro = clean_text[:INTRO_SIZE]
48
+ outro = clean_text[-OUTRO_SIZE:]
49
+ truncated_chars = len(clean_text) - MAX_TOTAL
50
+
51
+ print(f"--- CONTENT START (First {INTRO_SIZE} chars) ---")
52
+ print(intro)
53
+ print(f"\n\n[... {truncated_chars:,} CHARACTERS TRUNCATED - Middle section omitted to preserve intro and conclusion ...]\n\n")
54
+ print(f"--- CONTENT END (Last {OUTRO_SIZE} chars) ---")
55
+ print(outro)
56
+ else:
57
+ print(clean_text)
58
+
59
+ browser.close()
60
+
61
+ except Exception as e:
62
+ print(f"Error reading webpage: {e}")
63
+ sys.exit(1)
64
+
65
+ if __name__ == "__main__":
66
+ parser = argparse.ArgumentParser()
67
+ parser.add_argument("--url", required=True)
68
+ args = parser.parse_args()
69
+ read_webpage(args.url)
@@ -1,45 +1,45 @@
1
- import requests
2
- import sys
3
- import time
4
- from random import uniform
5
-
6
- def check_link(url):
7
- # Add random delay to implement rate limiting
8
- delay = uniform(1, 3) # Random delay between 1-3 seconds
9
- print(f"Rate limiting: Waiting {delay:.2f} seconds before checking {url}", file=sys.stderr)
10
- time.sleep(delay)
11
-
12
- try:
13
- # More realistic User-Agent to appear like a regular browser
14
- headers = {
15
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
16
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
17
- 'Accept-Language': 'en-US,en;q=0.5',
18
- 'Accept-Encoding': 'gzip, deflate',
19
- 'Connection': 'keep-alive',
20
- 'Upgrade-Insecure-Requests': '1',
21
- }
22
-
23
- response = requests.head(url, headers=headers, timeout=5, allow_redirects=True)
24
-
25
- if response.status_code == 200:
26
- return True, "OK"
27
- else:
28
- # Retry with GET if HEAD fails (some servers block HEAD)
29
- response = requests.get(url, headers=headers, timeout=5, stream=True)
30
- if response.status_code == 200:
31
- return True, "OK"
32
- return False, f"Status Code: {response.status_code}"
33
-
34
- except requests.exceptions.RequestException as e:
35
- return False, f"Request error: {str(e)}"
36
- except Exception as e:
37
- return False, f"General error: {str(e)}"
38
-
39
- if __name__ == "__main__":
40
- if len(sys.argv) > 1:
41
- url = sys.argv[1]
42
- success, msg = check_link(url)
43
- print(f"{'VALID' if success else 'INVALID'}: {msg}")
44
- else:
45
- print("Usage: python link_checker.py [URL]")
1
+ import requests
2
+ import sys
3
+ import time
4
+ from random import uniform
5
+
6
+ def check_link(url):
7
+ # Add random delay to implement rate limiting
8
+ delay = uniform(1, 3) # Random delay between 1-3 seconds
9
+ print(f"Rate limiting: Waiting {delay:.2f} seconds before checking {url}", file=sys.stderr)
10
+ time.sleep(delay)
11
+
12
+ try:
13
+ # More realistic User-Agent to appear like a regular browser
14
+ headers = {
15
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
16
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
17
+ 'Accept-Language': 'en-US,en;q=0.5',
18
+ 'Accept-Encoding': 'gzip, deflate',
19
+ 'Connection': 'keep-alive',
20
+ 'Upgrade-Insecure-Requests': '1',
21
+ }
22
+
23
+ response = requests.head(url, headers=headers, timeout=5, allow_redirects=True)
24
+
25
+ if response.status_code == 200:
26
+ return True, "OK"
27
+ else:
28
+ # Retry with GET if HEAD fails (some servers block HEAD)
29
+ response = requests.get(url, headers=headers, timeout=5, stream=True)
30
+ if response.status_code == 200:
31
+ return True, "OK"
32
+ return False, f"Status Code: {response.status_code}"
33
+
34
+ except requests.exceptions.RequestException as e:
35
+ return False, f"Request error: {str(e)}"
36
+ except Exception as e:
37
+ return False, f"General error: {str(e)}"
38
+
39
+ if __name__ == "__main__":
40
+ if len(sys.argv) > 1:
41
+ url = sys.argv[1]
42
+ success, msg = check_link(url)
43
+ print(f"{'VALID' if success else 'INVALID'}: {msg}")
44
+ else:
45
+ print("Usage: python link_checker.py [URL]")