agent-recipes 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. agent_recipes/__init__.py +27 -0
  2. agent_recipes/recipe_runtime/__init__.py +28 -0
  3. agent_recipes/recipe_runtime/core.py +385 -0
  4. agent_recipes/templates/ai-ab-hook-tester/recipe.yaml +45 -0
  5. agent_recipes/templates/ai-ab-hook-tester/tools.py +169 -0
  6. agent_recipes/templates/ai-angle-generator/recipe.yaml +49 -0
  7. agent_recipes/templates/ai-angle-generator/tools.py +182 -0
  8. agent_recipes/templates/ai-api-doc-generator/README.md +59 -0
  9. agent_recipes/templates/ai-api-doc-generator/TEMPLATE.yaml +29 -0
  10. agent_recipes/templates/ai-api-tester/README.md +60 -0
  11. agent_recipes/templates/ai-api-tester/TEMPLATE.yaml +29 -0
  12. agent_recipes/templates/ai-audio-enhancer/README.md +59 -0
  13. agent_recipes/templates/ai-audio-enhancer/TEMPLATE.yaml +28 -0
  14. agent_recipes/templates/ai-audio-normalizer/README.md +13 -0
  15. agent_recipes/templates/ai-audio-normalizer/TEMPLATE.yaml +44 -0
  16. agent_recipes/templates/ai-audio-splitter/README.md +14 -0
  17. agent_recipes/templates/ai-audio-splitter/TEMPLATE.yaml +47 -0
  18. agent_recipes/templates/ai-background-music-generator/README.md +59 -0
  19. agent_recipes/templates/ai-background-music-generator/TEMPLATE.yaml +28 -0
  20. agent_recipes/templates/ai-background-remover/README.md +60 -0
  21. agent_recipes/templates/ai-background-remover/TEMPLATE.yaml +27 -0
  22. agent_recipes/templates/ai-barcode-scanner/README.md +60 -0
  23. agent_recipes/templates/ai-barcode-scanner/TEMPLATE.yaml +26 -0
  24. agent_recipes/templates/ai-blog-generator/README.md +59 -0
  25. agent_recipes/templates/ai-blog-generator/TEMPLATE.yaml +28 -0
  26. agent_recipes/templates/ai-brief-generator/recipe.yaml +52 -0
  27. agent_recipes/templates/ai-brief-generator/tools.py +231 -0
  28. agent_recipes/templates/ai-broll-builder/recipe.yaml +47 -0
  29. agent_recipes/templates/ai-broll-builder/tools.py +204 -0
  30. agent_recipes/templates/ai-calendar-scheduler/README.md +60 -0
  31. agent_recipes/templates/ai-calendar-scheduler/TEMPLATE.yaml +29 -0
  32. agent_recipes/templates/ai-changelog-generator/README.md +14 -0
  33. agent_recipes/templates/ai-changelog-generator/TEMPLATE.yaml +46 -0
  34. agent_recipes/templates/ai-chart-generator/README.md +61 -0
  35. agent_recipes/templates/ai-chart-generator/TEMPLATE.yaml +32 -0
  36. agent_recipes/templates/ai-code-documenter/README.md +12 -0
  37. agent_recipes/templates/ai-code-documenter/TEMPLATE.yaml +37 -0
  38. agent_recipes/templates/ai-code-refactorer/README.md +59 -0
  39. agent_recipes/templates/ai-code-refactorer/TEMPLATE.yaml +28 -0
  40. agent_recipes/templates/ai-code-reviewer/README.md +59 -0
  41. agent_recipes/templates/ai-code-reviewer/TEMPLATE.yaml +31 -0
  42. agent_recipes/templates/ai-color-palette-extractor/README.md +60 -0
  43. agent_recipes/templates/ai-color-palette-extractor/TEMPLATE.yaml +27 -0
  44. agent_recipes/templates/ai-comment-miner/recipe.yaml +40 -0
  45. agent_recipes/templates/ai-comment-miner/tools.py +141 -0
  46. agent_recipes/templates/ai-commit-message-generator/README.md +59 -0
  47. agent_recipes/templates/ai-commit-message-generator/TEMPLATE.yaml +31 -0
  48. agent_recipes/templates/ai-content-calendar/recipe.yaml +43 -0
  49. agent_recipes/templates/ai-content-calendar/tools.py +170 -0
  50. agent_recipes/templates/ai-context-enricher/recipe.yaml +48 -0
  51. agent_recipes/templates/ai-context-enricher/tools.py +258 -0
  52. agent_recipes/templates/ai-contract-analyzer/README.md +60 -0
  53. agent_recipes/templates/ai-contract-analyzer/TEMPLATE.yaml +34 -0
  54. agent_recipes/templates/ai-csv-cleaner/README.md +13 -0
  55. agent_recipes/templates/ai-csv-cleaner/TEMPLATE.yaml +45 -0
  56. agent_recipes/templates/ai-cta-generator/recipe.yaml +54 -0
  57. agent_recipes/templates/ai-cta-generator/tools.py +174 -0
  58. agent_recipes/templates/ai-daily-news-show/recipe.yaml +103 -0
  59. agent_recipes/templates/ai-daily-news-show/tools.py +308 -0
  60. agent_recipes/templates/ai-data-anonymizer/README.md +60 -0
  61. agent_recipes/templates/ai-data-anonymizer/TEMPLATE.yaml +31 -0
  62. agent_recipes/templates/ai-data-profiler/README.md +14 -0
  63. agent_recipes/templates/ai-data-profiler/TEMPLATE.yaml +42 -0
  64. agent_recipes/templates/ai-dependency-auditor/README.md +12 -0
  65. agent_recipes/templates/ai-dependency-auditor/TEMPLATE.yaml +37 -0
  66. agent_recipes/templates/ai-doc-translator/README.md +12 -0
  67. agent_recipes/templates/ai-doc-translator/TEMPLATE.yaml +41 -0
  68. agent_recipes/templates/ai-duplicate-finder/README.md +59 -0
  69. agent_recipes/templates/ai-duplicate-finder/TEMPLATE.yaml +28 -0
  70. agent_recipes/templates/ai-ebook-converter/README.md +60 -0
  71. agent_recipes/templates/ai-ebook-converter/TEMPLATE.yaml +27 -0
  72. agent_recipes/templates/ai-email-parser/README.md +59 -0
  73. agent_recipes/templates/ai-email-parser/TEMPLATE.yaml +29 -0
  74. agent_recipes/templates/ai-etl-pipeline/README.md +60 -0
  75. agent_recipes/templates/ai-etl-pipeline/TEMPLATE.yaml +30 -0
  76. agent_recipes/templates/ai-excel-formula-generator/README.md +59 -0
  77. agent_recipes/templates/ai-excel-formula-generator/TEMPLATE.yaml +28 -0
  78. agent_recipes/templates/ai-face-blur/README.md +60 -0
  79. agent_recipes/templates/ai-face-blur/TEMPLATE.yaml +28 -0
  80. agent_recipes/templates/ai-fact-checker/recipe.yaml +52 -0
  81. agent_recipes/templates/ai-fact-checker/tools.py +279 -0
  82. agent_recipes/templates/ai-faq-generator/README.md +59 -0
  83. agent_recipes/templates/ai-faq-generator/TEMPLATE.yaml +28 -0
  84. agent_recipes/templates/ai-file-organizer/README.md +59 -0
  85. agent_recipes/templates/ai-file-organizer/TEMPLATE.yaml +29 -0
  86. agent_recipes/templates/ai-folder-packager/README.md +15 -0
  87. agent_recipes/templates/ai-folder-packager/TEMPLATE.yaml +48 -0
  88. agent_recipes/templates/ai-form-filler/README.md +60 -0
  89. agent_recipes/templates/ai-form-filler/TEMPLATE.yaml +30 -0
  90. agent_recipes/templates/ai-hashtag-optimizer/recipe.yaml +45 -0
  91. agent_recipes/templates/ai-hashtag-optimizer/tools.py +134 -0
  92. agent_recipes/templates/ai-hook-generator/recipe.yaml +50 -0
  93. agent_recipes/templates/ai-hook-generator/tools.py +177 -0
  94. agent_recipes/templates/ai-image-captioner/README.md +59 -0
  95. agent_recipes/templates/ai-image-captioner/TEMPLATE.yaml +28 -0
  96. agent_recipes/templates/ai-image-cataloger/README.md +13 -0
  97. agent_recipes/templates/ai-image-cataloger/TEMPLATE.yaml +39 -0
  98. agent_recipes/templates/ai-image-optimizer/README.md +13 -0
  99. agent_recipes/templates/ai-image-optimizer/TEMPLATE.yaml +43 -0
  100. agent_recipes/templates/ai-image-resizer/README.md +12 -0
  101. agent_recipes/templates/ai-image-resizer/TEMPLATE.yaml +39 -0
  102. agent_recipes/templates/ai-image-tagger/README.md +59 -0
  103. agent_recipes/templates/ai-image-tagger/TEMPLATE.yaml +28 -0
  104. agent_recipes/templates/ai-image-upscaler/README.md +60 -0
  105. agent_recipes/templates/ai-image-upscaler/TEMPLATE.yaml +27 -0
  106. agent_recipes/templates/ai-invoice-processor/README.md +60 -0
  107. agent_recipes/templates/ai-invoice-processor/TEMPLATE.yaml +34 -0
  108. agent_recipes/templates/ai-json-to-csv/README.md +12 -0
  109. agent_recipes/templates/ai-json-to-csv/TEMPLATE.yaml +36 -0
  110. agent_recipes/templates/ai-log-analyzer/README.md +59 -0
  111. agent_recipes/templates/ai-log-analyzer/TEMPLATE.yaml +28 -0
  112. agent_recipes/templates/ai-markdown-to-pdf/README.md +12 -0
  113. agent_recipes/templates/ai-markdown-to-pdf/TEMPLATE.yaml +40 -0
  114. agent_recipes/templates/ai-meeting-summarizer/README.md +59 -0
  115. agent_recipes/templates/ai-meeting-summarizer/TEMPLATE.yaml +32 -0
  116. agent_recipes/templates/ai-meta-tag-generator/README.md +59 -0
  117. agent_recipes/templates/ai-meta-tag-generator/TEMPLATE.yaml +28 -0
  118. agent_recipes/templates/ai-news-capture-pack/recipe.yaml +42 -0
  119. agent_recipes/templates/ai-news-capture-pack/tools.py +150 -0
  120. agent_recipes/templates/ai-news-crawler/recipe.yaml +99 -0
  121. agent_recipes/templates/ai-news-crawler/tools.py +417 -0
  122. agent_recipes/templates/ai-news-deduper/recipe.yaml +47 -0
  123. agent_recipes/templates/ai-news-deduper/tools.py +235 -0
  124. agent_recipes/templates/ai-newsletter-generator/README.md +59 -0
  125. agent_recipes/templates/ai-newsletter-generator/TEMPLATE.yaml +28 -0
  126. agent_recipes/templates/ai-note-summarizer/README.md +59 -0
  127. agent_recipes/templates/ai-note-summarizer/TEMPLATE.yaml +28 -0
  128. agent_recipes/templates/ai-pdf-summarizer/README.md +12 -0
  129. agent_recipes/templates/ai-pdf-summarizer/TEMPLATE.yaml +40 -0
  130. agent_recipes/templates/ai-pdf-to-markdown/README.md +19 -0
  131. agent_recipes/templates/ai-pdf-to-markdown/TEMPLATE.yaml +63 -0
  132. agent_recipes/templates/ai-performance-analyzer/recipe.yaml +45 -0
  133. agent_recipes/templates/ai-performance-analyzer/tools.py +159 -0
  134. agent_recipes/templates/ai-podcast-cleaner/README.md +117 -0
  135. agent_recipes/templates/ai-podcast-cleaner/TEMPLATE.yaml +117 -0
  136. agent_recipes/templates/ai-podcast-cleaner/agents.yaml +59 -0
  137. agent_recipes/templates/ai-podcast-cleaner/workflow.yaml +77 -0
  138. agent_recipes/templates/ai-podcast-transcriber/README.md +59 -0
  139. agent_recipes/templates/ai-podcast-transcriber/TEMPLATE.yaml +32 -0
  140. agent_recipes/templates/ai-post-copy-generator/recipe.yaml +41 -0
  141. agent_recipes/templates/ai-post-copy-generator/tools.py +105 -0
  142. agent_recipes/templates/ai-product-description-generator/README.md +59 -0
  143. agent_recipes/templates/ai-product-description-generator/TEMPLATE.yaml +28 -0
  144. agent_recipes/templates/ai-publisher-pack/recipe.yaml +44 -0
  145. agent_recipes/templates/ai-publisher-pack/tools.py +252 -0
  146. agent_recipes/templates/ai-qr-code-generator/README.md +60 -0
  147. agent_recipes/templates/ai-qr-code-generator/TEMPLATE.yaml +26 -0
  148. agent_recipes/templates/ai-regex-generator/README.md +59 -0
  149. agent_recipes/templates/ai-regex-generator/TEMPLATE.yaml +28 -0
  150. agent_recipes/templates/ai-repo-readme/README.md +13 -0
  151. agent_recipes/templates/ai-repo-readme/TEMPLATE.yaml +42 -0
  152. agent_recipes/templates/ai-report-generator/README.md +61 -0
  153. agent_recipes/templates/ai-report-generator/TEMPLATE.yaml +32 -0
  154. agent_recipes/templates/ai-resume-parser/README.md +60 -0
  155. agent_recipes/templates/ai-resume-parser/TEMPLATE.yaml +33 -0
  156. agent_recipes/templates/ai-rss-aggregator/README.md +60 -0
  157. agent_recipes/templates/ai-rss-aggregator/TEMPLATE.yaml +30 -0
  158. agent_recipes/templates/ai-schema-generator/README.md +12 -0
  159. agent_recipes/templates/ai-schema-generator/TEMPLATE.yaml +34 -0
  160. agent_recipes/templates/ai-screen-recorder/recipe.yaml +43 -0
  161. agent_recipes/templates/ai-screen-recorder/tools.py +184 -0
  162. agent_recipes/templates/ai-screenshot-capture/recipe.yaml +45 -0
  163. agent_recipes/templates/ai-screenshot-capture/tools.py +231 -0
  164. agent_recipes/templates/ai-screenshot-ocr/README.md +12 -0
  165. agent_recipes/templates/ai-screenshot-ocr/TEMPLATE.yaml +37 -0
  166. agent_recipes/templates/ai-script-writer/recipe.yaml +58 -0
  167. agent_recipes/templates/ai-script-writer/tools.py +297 -0
  168. agent_recipes/templates/ai-sentiment-analyzer/README.md +59 -0
  169. agent_recipes/templates/ai-sentiment-analyzer/TEMPLATE.yaml +28 -0
  170. agent_recipes/templates/ai-seo-optimizer/README.md +59 -0
  171. agent_recipes/templates/ai-seo-optimizer/TEMPLATE.yaml +28 -0
  172. agent_recipes/templates/ai-signal-ranker/recipe.yaml +54 -0
  173. agent_recipes/templates/ai-signal-ranker/tools.py +256 -0
  174. agent_recipes/templates/ai-sitemap-generator/README.md +59 -0
  175. agent_recipes/templates/ai-sitemap-generator/TEMPLATE.yaml +26 -0
  176. agent_recipes/templates/ai-sitemap-scraper/README.md +13 -0
  177. agent_recipes/templates/ai-sitemap-scraper/TEMPLATE.yaml +41 -0
  178. agent_recipes/templates/ai-slide-generator/README.md +60 -0
  179. agent_recipes/templates/ai-slide-generator/TEMPLATE.yaml +29 -0
  180. agent_recipes/templates/ai-slide-to-notes/README.md +12 -0
  181. agent_recipes/templates/ai-slide-to-notes/TEMPLATE.yaml +37 -0
  182. agent_recipes/templates/ai-social-media-generator/README.md +59 -0
  183. agent_recipes/templates/ai-social-media-generator/TEMPLATE.yaml +28 -0
  184. agent_recipes/templates/ai-sql-generator/README.md +59 -0
  185. agent_recipes/templates/ai-sql-generator/TEMPLATE.yaml +28 -0
  186. agent_recipes/templates/ai-subtitle-generator/README.md +59 -0
  187. agent_recipes/templates/ai-subtitle-generator/TEMPLATE.yaml +31 -0
  188. agent_recipes/templates/ai-test-generator/README.md +59 -0
  189. agent_recipes/templates/ai-test-generator/TEMPLATE.yaml +28 -0
  190. agent_recipes/templates/ai-translation-batch/README.md +59 -0
  191. agent_recipes/templates/ai-translation-batch/TEMPLATE.yaml +28 -0
  192. agent_recipes/templates/ai-url-to-markdown/README.md +14 -0
  193. agent_recipes/templates/ai-url-to-markdown/TEMPLATE.yaml +44 -0
  194. agent_recipes/templates/ai-video-chapter-generator/README.md +59 -0
  195. agent_recipes/templates/ai-video-chapter-generator/TEMPLATE.yaml +32 -0
  196. agent_recipes/templates/ai-video-compressor/README.md +59 -0
  197. agent_recipes/templates/ai-video-compressor/TEMPLATE.yaml +28 -0
  198. agent_recipes/templates/ai-video-editor/README.md +254 -0
  199. agent_recipes/templates/ai-video-editor/TEMPLATE.yaml +139 -0
  200. agent_recipes/templates/ai-video-editor/agents.yaml +36 -0
  201. agent_recipes/templates/ai-video-editor/requirements.txt +8 -0
  202. agent_recipes/templates/ai-video-editor/scripts/run.sh +10 -0
  203. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__init__.py +45 -0
  204. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__main__.py +8 -0
  205. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/__init__.cpython-312.pyc +0 -0
  206. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/cli.cpython-312.pyc +0 -0
  207. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/config.cpython-312.pyc +0 -0
  208. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/ffmpeg_probe.cpython-312.pyc +0 -0
  209. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/heuristics.cpython-312.pyc +0 -0
  210. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/llm_plan.cpython-312.pyc +0 -0
  211. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/models.cpython-312.pyc +0 -0
  212. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/pipeline.cpython-312.pyc +0 -0
  213. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/render.cpython-312.pyc +0 -0
  214. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/timeline.cpython-312.pyc +0 -0
  215. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/transcribe.cpython-312.pyc +0 -0
  216. agent_recipes/templates/ai-video-editor/src/ai_video_editor/__pycache__/utils.cpython-312.pyc +0 -0
  217. agent_recipes/templates/ai-video-editor/src/ai_video_editor/cli.py +343 -0
  218. agent_recipes/templates/ai-video-editor/src/ai_video_editor/config.py +102 -0
  219. agent_recipes/templates/ai-video-editor/src/ai_video_editor/ffmpeg_probe.py +92 -0
  220. agent_recipes/templates/ai-video-editor/src/ai_video_editor/heuristics.py +119 -0
  221. agent_recipes/templates/ai-video-editor/src/ai_video_editor/llm_plan.py +277 -0
  222. agent_recipes/templates/ai-video-editor/src/ai_video_editor/models.py +343 -0
  223. agent_recipes/templates/ai-video-editor/src/ai_video_editor/pipeline.py +287 -0
  224. agent_recipes/templates/ai-video-editor/src/ai_video_editor/render.py +274 -0
  225. agent_recipes/templates/ai-video-editor/src/ai_video_editor/timeline.py +278 -0
  226. agent_recipes/templates/ai-video-editor/src/ai_video_editor/transcribe.py +233 -0
  227. agent_recipes/templates/ai-video-editor/src/ai_video_editor/utils.py +222 -0
  228. agent_recipes/templates/ai-video-editor/src/input.mov +0 -0
  229. agent_recipes/templates/ai-video-editor/src/out.mp4 +0 -0
  230. agent_recipes/templates/ai-video-editor/tests/test_heuristics.py +130 -0
  231. agent_recipes/templates/ai-video-editor/tests/test_models.py +152 -0
  232. agent_recipes/templates/ai-video-editor/tests/test_timeline.py +105 -0
  233. agent_recipes/templates/ai-video-editor/workflow.yaml +51 -0
  234. agent_recipes/templates/ai-video-highlight-extractor/README.md +60 -0
  235. agent_recipes/templates/ai-video-highlight-extractor/TEMPLATE.yaml +33 -0
  236. agent_recipes/templates/ai-video-merger/recipe.yaml +40 -0
  237. agent_recipes/templates/ai-video-merger/tools.py +172 -0
  238. agent_recipes/templates/ai-video-thumbnails/README.md +16 -0
  239. agent_recipes/templates/ai-video-thumbnails/TEMPLATE.yaml +53 -0
  240. agent_recipes/templates/ai-video-to-gif/README.md +14 -0
  241. agent_recipes/templates/ai-video-to-gif/TEMPLATE.yaml +64 -0
  242. agent_recipes/templates/ai-voice-cloner/README.md +59 -0
  243. agent_recipes/templates/ai-voice-cloner/TEMPLATE.yaml +31 -0
  244. agent_recipes/templates/ai-voiceover-generator/recipe.yaml +41 -0
  245. agent_recipes/templates/ai-voiceover-generator/tools.py +194 -0
  246. agent_recipes/templates/ai-watermark-adder/README.md +59 -0
  247. agent_recipes/templates/ai-watermark-adder/TEMPLATE.yaml +26 -0
  248. agent_recipes/templates/ai-watermark-remover/README.md +60 -0
  249. agent_recipes/templates/ai-watermark-remover/TEMPLATE.yaml +32 -0
  250. agent_recipes/templates/data-transformer/README.md +75 -0
  251. agent_recipes/templates/data-transformer/TEMPLATE.yaml +63 -0
  252. agent_recipes/templates/data-transformer/agents.yaml +70 -0
  253. agent_recipes/templates/data-transformer/workflow.yaml +92 -0
  254. agent_recipes/templates/shorts-generator/README.md +61 -0
  255. agent_recipes/templates/shorts-generator/TEMPLATE.yaml +65 -0
  256. agent_recipes/templates/shorts-generator/agents.yaml +66 -0
  257. agent_recipes/templates/shorts-generator/workflow.yaml +86 -0
  258. agent_recipes/templates/transcript-generator/README.md +103 -0
  259. agent_recipes/templates/transcript-generator/TEMPLATE.yaml +57 -0
  260. agent_recipes/templates/transcript-generator/agents.yaml +62 -0
  261. agent_recipes/templates/transcript-generator/workflow.yaml +82 -0
  262. agent_recipes/templates/video-editor/README.md +70 -0
  263. agent_recipes/templates/video-editor/TEMPLATE.yaml +55 -0
  264. agent_recipes/templates/video-editor/agents.yaml +68 -0
  265. agent_recipes/templates/video-editor/workflow.yaml +92 -0
  266. agent_recipes-0.0.5.dist-info/METADATA +145 -0
  267. agent_recipes-0.0.5.dist-info/RECORD +269 -0
  268. agent_recipes-0.0.5.dist-info/WHEEL +5 -0
  269. agent_recipes-0.0.5.dist-info/top_level.txt +1 -0
  270. /236/326/177nE/243/231/214/232/265/322m/201/253/353/022C/372/321/266/b/225^=/272/017t/262/3337/310@/315wb/341pB/277z/216/330/314/004/265B/213/375/236/203/026/373/307/354z41/347#/374q/262/22589/032/276 /277/244Vh/322/017/004/224/215/004/367/377/375/335/n +0 -0
@@ -0,0 +1,150 @@
1
+ """
2
+ AI News Capture Pack Tools
3
+
4
+ Bundle assets per news story:
5
+ - Screenshots
6
+ - Metadata
7
+ - Source links
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import shutil
14
+ import zipfile
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def capture_story_assets(
23
+ urls: List[str],
24
+ story_id: str,
25
+ output_dir: Optional[str] = None,
26
+ ) -> Dict[str, Any]:
27
+ """
28
+ Capture all assets for a news story.
29
+
30
+ Args:
31
+ urls: List of URLs to capture
32
+ story_id: Unique story identifier
33
+ output_dir: Output directory
34
+
35
+ Returns:
36
+ Dictionary with captured assets
37
+ """
38
+ output_dir = output_dir or f"./captures/{story_id}"
39
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
40
+
41
+ assets = []
42
+
43
+ try:
44
+ from playwright.sync_api import sync_playwright
45
+
46
+ with sync_playwright() as p:
47
+ browser = p.chromium.launch(headless=True)
48
+ page = browser.new_page(viewport={"width": 1920, "height": 1080})
49
+
50
+ for i, url in enumerate(urls):
51
+ try:
52
+ page.goto(url, wait_until="networkidle", timeout=30000)
53
+
54
+ # Capture screenshot
55
+ screenshot_path = os.path.join(output_dir, f"capture_{i}.png")
56
+ page.screenshot(path=screenshot_path, full_page=True)
57
+
58
+ # Extract metadata
59
+ title = page.title()
60
+
61
+ assets.append({
62
+ "url": url,
63
+ "screenshot": screenshot_path,
64
+ "title": title,
65
+ "captured_at": datetime.now(timezone.utc).isoformat(),
66
+ })
67
+
68
+ except Exception as e:
69
+ logger.warning(f"Error capturing {url}: {e}")
70
+ assets.append({
71
+ "url": url,
72
+ "error": str(e),
73
+ })
74
+
75
+ browser.close()
76
+
77
+ except ImportError:
78
+ logger.error("Playwright not installed")
79
+ return {"error": "Playwright not installed"}
80
+
81
+ return {
82
+ "story_id": story_id,
83
+ "assets": assets,
84
+ "output_dir": output_dir,
85
+ "total_captured": len([a for a in assets if "screenshot" in a]),
86
+ }
87
+
88
+
89
+ def create_bundle(
90
+ story_id: str,
91
+ assets: List[Dict[str, Any]],
92
+ output_dir: Optional[str] = None,
93
+ include_metadata: bool = True,
94
+ ) -> Dict[str, Any]:
95
+ """
96
+ Create a bundled pack of story assets.
97
+
98
+ Args:
99
+ story_id: Story identifier
100
+ assets: List of asset dictionaries
101
+ output_dir: Output directory
102
+ include_metadata: Include metadata JSON
103
+
104
+ Returns:
105
+ Bundle info
106
+ """
107
+ output_dir = output_dir or "./bundles"
108
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
109
+
110
+ bundle_name = f"{story_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
111
+ bundle_dir = os.path.join(output_dir, bundle_name)
112
+ Path(bundle_dir).mkdir(parents=True, exist_ok=True)
113
+
114
+ bundled_files = []
115
+
116
+ # Copy assets to bundle
117
+ for asset in assets:
118
+ if "screenshot" in asset and os.path.exists(asset["screenshot"]):
119
+ filename = os.path.basename(asset["screenshot"])
120
+ dest = os.path.join(bundle_dir, filename)
121
+ shutil.copy2(asset["screenshot"], dest)
122
+ bundled_files.append(filename)
123
+
124
+ # Create metadata
125
+ if include_metadata:
126
+ metadata = {
127
+ "story_id": story_id,
128
+ "created_at": datetime.now(timezone.utc).isoformat(),
129
+ "assets": assets,
130
+ "files": bundled_files,
131
+ }
132
+ metadata_path = os.path.join(bundle_dir, "metadata.json")
133
+ with open(metadata_path, "w") as f:
134
+ json.dump(metadata, f, indent=2)
135
+
136
+ # Create zip
137
+ zip_path = f"{bundle_dir}.zip"
138
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
139
+ for root, dirs, files in os.walk(bundle_dir):
140
+ for file in files:
141
+ file_path = os.path.join(root, file)
142
+ arcname = os.path.relpath(file_path, bundle_dir)
143
+ zf.write(file_path, arcname)
144
+
145
+ return {
146
+ "bundle_path": zip_path,
147
+ "bundle_dir": bundle_dir,
148
+ "files": bundled_files,
149
+ "size_bytes": os.path.getsize(zip_path),
150
+ }
@@ -0,0 +1,99 @@
1
+ name: ai-news-crawler
2
+ version: 1.0.0
3
+ description: Crawl AI news from multiple sources including AI labs blogs, arXiv, GitHub trending, HackerNews, Reddit, and X lists
4
+ author: PraisonAI
5
+ tags:
6
+ - news
7
+ - crawler
8
+ - ai
9
+ - research
10
+ - aggregation
11
+
12
+ config:
13
+ sources:
14
+ - hackernews
15
+ - reddit
16
+ - arxiv
17
+ - github_trending
18
+ - ai_blogs
19
+ max_articles: 50
20
+ time_window_hours: 24
21
+ include_content: true
22
+ output_format: json
23
+
24
+ input:
25
+ type: object
26
+ properties:
27
+ sources:
28
+ type: array
29
+ description: List of sources to crawl
30
+ items:
31
+ type: string
32
+ enum: [hackernews, reddit, arxiv, github_trending, ai_blogs, x_lists]
33
+ max_articles:
34
+ type: integer
35
+ description: Maximum number of articles to fetch
36
+ default: 50
37
+ time_window_hours:
38
+ type: integer
39
+ description: Only fetch articles from the last N hours
40
+ default: 24
41
+
42
+ output:
43
+ type: object
44
+ properties:
45
+ articles:
46
+ type: array
47
+ items:
48
+ type: object
49
+ properties:
50
+ title:
51
+ type: string
52
+ url:
53
+ type: string
54
+ source:
55
+ type: string
56
+ published:
57
+ type: string
58
+ content:
59
+ type: string
60
+ score:
61
+ type: number
62
+ crawl_metadata:
63
+ type: object
64
+ properties:
65
+ total_fetched:
66
+ type: integer
67
+ sources_crawled:
68
+ type: array
69
+ crawl_time:
70
+ type: string
71
+
72
+ requires:
73
+ env:
74
+ - OPENAI_API_KEY
75
+ optional_env:
76
+ - TAVILY_API_KEY
77
+ - REDDIT_CLIENT_ID
78
+ - REDDIT_CLIENT_SECRET
79
+ packages:
80
+ - requests
81
+ - feedparser
82
+ - beautifulsoup4
83
+
84
+ workflow:
85
+ agents:
86
+ - name: news_crawler
87
+ role: AI News Aggregator
88
+ goal: Crawl and collect AI-related news from multiple sources
89
+ backstory: Expert at finding and aggregating AI news from various platforms
90
+ tools:
91
+ - crawl_hackernews
92
+ - crawl_reddit
93
+ - crawl_arxiv
94
+ - crawl_github_trending
95
+ - search_web
96
+ tasks:
97
+ - name: crawl_sources
98
+ description: Crawl all configured sources for AI news
99
+ expected_output: List of articles with metadata
@@ -0,0 +1,417 @@
1
+ """
2
+ AI News Crawler Tools
3
+
4
+ Tools for crawling AI news from multiple sources:
5
+ - HackerNews
6
+ - Reddit (r/MachineLearning, r/artificial, etc.)
7
+ - arXiv (cs.AI, cs.LG, cs.CL)
8
+ - GitHub Trending
9
+ - AI Labs Blogs
10
+ - Web Search (via Tavily)
11
+ """
12
+
13
+ import json
14
+ import os
15
+ import logging
16
+ from datetime import datetime, timedelta, timezone
17
+ from typing import Any, Dict, List, Optional
18
+ from urllib.parse import urljoin
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def crawl_hackernews(
24
+ max_articles: int = 20,
25
+ time_window_hours: int = 24,
26
+ keywords: Optional[List[str]] = None,
27
+ ) -> List[Dict[str, Any]]:
28
+ """
29
+ Crawl HackerNews for AI-related stories.
30
+
31
+ Args:
32
+ max_articles: Maximum number of articles to fetch
33
+ time_window_hours: Only fetch articles from the last N hours
34
+ keywords: Filter by keywords (default: AI-related terms)
35
+
36
+ Returns:
37
+ List of article dictionaries
38
+ """
39
+ import requests
40
+
41
+ keywords = keywords or ["ai", "gpt", "llm", "machine learning", "openai", "anthropic", "google ai", "neural", "transformer"]
42
+
43
+ articles = []
44
+ base_url = "https://hacker-news.firebaseio.com/v0"
45
+
46
+ try:
47
+ # Get top stories
48
+ response = requests.get(f"{base_url}/topstories.json", timeout=10)
49
+ response.raise_for_status()
50
+ story_ids = response.json()[:100] # Get top 100 to filter
51
+
52
+ cutoff_time = datetime.now(timezone.utc) - timedelta(hours=time_window_hours)
53
+
54
+ for story_id in story_ids:
55
+ if len(articles) >= max_articles:
56
+ break
57
+
58
+ try:
59
+ story_response = requests.get(f"{base_url}/item/{story_id}.json", timeout=5)
60
+ story = story_response.json()
61
+
62
+ if not story or story.get("type") != "story":
63
+ continue
64
+
65
+ title = story.get("title", "").lower()
66
+
67
+ # Check if AI-related
68
+ if not any(kw in title for kw in keywords):
69
+ continue
70
+
71
+ # Check time window
72
+ story_time = datetime.fromtimestamp(story.get("time", 0), tz=timezone.utc)
73
+ if story_time < cutoff_time:
74
+ continue
75
+
76
+ articles.append({
77
+ "title": story.get("title", ""),
78
+ "url": story.get("url", f"https://news.ycombinator.com/item?id={story_id}"),
79
+ "source": "hackernews",
80
+ "published": story_time.isoformat(),
81
+ "score": story.get("score", 0),
82
+ "comments": story.get("descendants", 0),
83
+ "author": story.get("by", ""),
84
+ "content": "", # HN doesn't provide content
85
+ })
86
+ except Exception as e:
87
+ logger.warning(f"Error fetching story {story_id}: {e}")
88
+ continue
89
+
90
+ except Exception as e:
91
+ logger.error(f"Error crawling HackerNews: {e}")
92
+
93
+ return articles
94
+
95
+
96
+ def crawl_reddit(
97
+ subreddits: Optional[List[str]] = None,
98
+ max_articles: int = 20,
99
+ time_window_hours: int = 24,
100
+ ) -> List[Dict[str, Any]]:
101
+ """
102
+ Crawl Reddit for AI-related posts.
103
+
104
+ Args:
105
+ subreddits: List of subreddits to crawl
106
+ max_articles: Maximum number of articles to fetch
107
+ time_window_hours: Only fetch articles from the last N hours
108
+
109
+ Returns:
110
+ List of article dictionaries
111
+ """
112
+ import requests
113
+
114
+ subreddits = subreddits or ["MachineLearning", "artificial", "LocalLLaMA", "OpenAI", "ClaudeAI"]
115
+ articles = []
116
+
117
+ headers = {"User-Agent": "PraisonAI News Crawler 1.0"}
118
+
119
+ for subreddit in subreddits:
120
+ if len(articles) >= max_articles:
121
+ break
122
+
123
+ try:
124
+ url = f"https://www.reddit.com/r/{subreddit}/hot.json?limit=25"
125
+ response = requests.get(url, headers=headers, timeout=10)
126
+ response.raise_for_status()
127
+ data = response.json()
128
+
129
+ cutoff_time = datetime.now(timezone.utc) - timedelta(hours=time_window_hours)
130
+
131
+ for post in data.get("data", {}).get("children", []):
132
+ if len(articles) >= max_articles:
133
+ break
134
+
135
+ post_data = post.get("data", {})
136
+
137
+ # Check time window
138
+ created = datetime.fromtimestamp(post_data.get("created_utc", 0), tz=timezone.utc)
139
+ if created < cutoff_time:
140
+ continue
141
+
142
+ articles.append({
143
+ "title": post_data.get("title", ""),
144
+ "url": post_data.get("url", ""),
145
+ "source": f"reddit/r/{subreddit}",
146
+ "published": created.isoformat(),
147
+ "score": post_data.get("score", 0),
148
+ "comments": post_data.get("num_comments", 0),
149
+ "author": post_data.get("author", ""),
150
+ "content": post_data.get("selftext", "")[:500],
151
+ })
152
+
153
+ except Exception as e:
154
+ logger.warning(f"Error crawling r/{subreddit}: {e}")
155
+ continue
156
+
157
+ return articles
158
+
159
+
160
+ def crawl_arxiv(
161
+ categories: Optional[List[str]] = None,
162
+ max_articles: int = 20,
163
+ time_window_hours: int = 48,
164
+ ) -> List[Dict[str, Any]]:
165
+ """
166
+ Crawl arXiv for AI research papers.
167
+
168
+ Args:
169
+ categories: arXiv categories to search
170
+ max_articles: Maximum number of papers to fetch
171
+ time_window_hours: Only fetch papers from the last N hours
172
+
173
+ Returns:
174
+ List of paper dictionaries
175
+ """
176
+ import requests
177
+ import xml.etree.ElementTree as ET
178
+
179
+ categories = categories or ["cs.AI", "cs.LG", "cs.CL", "cs.CV", "cs.NE"]
180
+ articles = []
181
+
182
+ try:
183
+ # Build query
184
+ cat_query = " OR ".join([f"cat:{cat}" for cat in categories])
185
+ url = f"http://export.arxiv.org/api/query?search_query={cat_query}&sortBy=submittedDate&sortOrder=descending&max_results={max_articles}"
186
+
187
+ response = requests.get(url, timeout=30)
188
+ response.raise_for_status()
189
+
190
+ # Parse XML
191
+ root = ET.fromstring(response.content)
192
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
193
+
194
+ for entry in root.findall("atom:entry", ns):
195
+ title = entry.find("atom:title", ns)
196
+ summary = entry.find("atom:summary", ns)
197
+ published = entry.find("atom:published", ns)
198
+ link = entry.find("atom:id", ns)
199
+ authors = entry.findall("atom:author/atom:name", ns)
200
+
201
+ articles.append({
202
+ "title": title.text.strip() if title is not None else "",
203
+ "url": link.text if link is not None else "",
204
+ "source": "arxiv",
205
+ "published": published.text if published is not None else "",
206
+ "content": summary.text.strip()[:500] if summary is not None else "",
207
+ "authors": [a.text for a in authors],
208
+ "score": 0,
209
+ })
210
+
211
+ except Exception as e:
212
+ logger.error(f"Error crawling arXiv: {e}")
213
+
214
+ return articles
215
+
216
+
217
+ def crawl_github_trending(
218
+ language: Optional[str] = None,
219
+ max_repos: int = 20,
220
+ ) -> List[Dict[str, Any]]:
221
+ """
222
+ Crawl GitHub trending repositories for AI projects.
223
+
224
+ Args:
225
+ language: Filter by programming language
226
+ max_repos: Maximum number of repos to fetch
227
+
228
+ Returns:
229
+ List of repository dictionaries
230
+ """
231
+ import requests
232
+ from bs4 import BeautifulSoup
233
+
234
+ articles = []
235
+
236
+ try:
237
+ url = "https://github.com/trending"
238
+ if language:
239
+ url += f"/{language}"
240
+ url += "?since=daily"
241
+
242
+ headers = {"User-Agent": "PraisonAI News Crawler 1.0"}
243
+ response = requests.get(url, headers=headers, timeout=10)
244
+ response.raise_for_status()
245
+
246
+ soup = BeautifulSoup(response.text, "html.parser")
247
+
248
+ # AI-related keywords
249
+ ai_keywords = ["ai", "llm", "gpt", "transformer", "neural", "ml", "machine-learning",
250
+ "deep-learning", "nlp", "vision", "agent", "rag", "embedding"]
251
+
252
+ for article in soup.select("article.Box-row")[:max_repos * 2]:
253
+ try:
254
+ repo_link = article.select_one("h2 a")
255
+ if not repo_link:
256
+ continue
257
+
258
+ repo_name = repo_link.get_text(strip=True).replace("\n", "").replace(" ", "")
259
+ repo_url = "https://github.com" + repo_link.get("href", "")
260
+
261
+ description_elem = article.select_one("p")
262
+ description = description_elem.get_text(strip=True) if description_elem else ""
263
+
264
+ # Check if AI-related
265
+ text_to_check = (repo_name + " " + description).lower()
266
+ if not any(kw in text_to_check for kw in ai_keywords):
267
+ continue
268
+
269
+ stars_elem = article.select_one("a[href*='/stargazers']")
270
+ stars = stars_elem.get_text(strip=True).replace(",", "") if stars_elem else "0"
271
+
272
+ articles.append({
273
+ "title": repo_name,
274
+ "url": repo_url,
275
+ "source": "github_trending",
276
+ "published": datetime.now(timezone.utc).isoformat(),
277
+ "content": description,
278
+ "score": int(stars) if stars.isdigit() else 0,
279
+ })
280
+
281
+ if len(articles) >= max_repos:
282
+ break
283
+
284
+ except Exception as e:
285
+ logger.warning(f"Error parsing repo: {e}")
286
+ continue
287
+
288
+ except Exception as e:
289
+ logger.error(f"Error crawling GitHub trending: {e}")
290
+
291
+ return articles
292
+
293
+
294
+ def search_web(
295
+ query: str,
296
+ max_results: int = 10,
297
+ ) -> List[Dict[str, Any]]:
298
+ """
299
+ Search the web for AI news using Tavily API.
300
+
301
+ Args:
302
+ query: Search query
303
+ max_results: Maximum number of results
304
+
305
+ Returns:
306
+ List of search result dictionaries
307
+ """
308
+ api_key = os.environ.get("TAVILY_API_KEY")
309
+ if not api_key:
310
+ logger.warning("TAVILY_API_KEY not set, skipping web search")
311
+ return []
312
+
313
+ import requests
314
+
315
+ articles = []
316
+
317
+ try:
318
+ response = requests.post(
319
+ "https://api.tavily.com/search",
320
+ json={
321
+ "api_key": api_key,
322
+ "query": query,
323
+ "search_depth": "advanced",
324
+ "max_results": max_results,
325
+ "include_answer": False,
326
+ },
327
+ timeout=30,
328
+ )
329
+ response.raise_for_status()
330
+ data = response.json()
331
+
332
+ for result in data.get("results", []):
333
+ articles.append({
334
+ "title": result.get("title", ""),
335
+ "url": result.get("url", ""),
336
+ "source": "web_search",
337
+ "published": datetime.now(timezone.utc).isoformat(),
338
+ "content": result.get("content", "")[:500],
339
+ "score": result.get("score", 0),
340
+ })
341
+
342
+ except Exception as e:
343
+ logger.error(f"Error in web search: {e}")
344
+
345
+ return articles
346
+
347
+
348
+ def crawl_ai_news(
349
+ sources: Optional[List[str]] = None,
350
+ max_articles: int = 50,
351
+ time_window_hours: int = 24,
352
+ output_dir: Optional[str] = None,
353
+ ) -> Dict[str, Any]:
354
+ """
355
+ Main function to crawl AI news from all configured sources.
356
+
357
+ Args:
358
+ sources: List of sources to crawl
359
+ max_articles: Maximum total articles
360
+ time_window_hours: Time window for articles
361
+ output_dir: Optional directory to save results
362
+
363
+ Returns:
364
+ Dictionary with articles and metadata
365
+ """
366
+ sources = sources or ["hackernews", "reddit", "arxiv", "github_trending"]
367
+ all_articles = []
368
+ sources_crawled = []
369
+
370
+ per_source_limit = max(5, max_articles // len(sources))
371
+
372
+ for source in sources:
373
+ try:
374
+ if source == "hackernews":
375
+ articles = crawl_hackernews(max_articles=per_source_limit, time_window_hours=time_window_hours)
376
+ elif source == "reddit":
377
+ articles = crawl_reddit(max_articles=per_source_limit, time_window_hours=time_window_hours)
378
+ elif source == "arxiv":
379
+ articles = crawl_arxiv(max_articles=per_source_limit, time_window_hours=time_window_hours)
380
+ elif source == "github_trending":
381
+ articles = crawl_github_trending(max_repos=per_source_limit)
382
+ elif source == "web_search":
383
+ articles = search_web("AI news today", max_results=per_source_limit)
384
+ else:
385
+ logger.warning(f"Unknown source: {source}")
386
+ continue
387
+
388
+ all_articles.extend(articles)
389
+ sources_crawled.append(source)
390
+ logger.info(f"Crawled {len(articles)} articles from {source}")
391
+
392
+ except Exception as e:
393
+ logger.error(f"Error crawling {source}: {e}")
394
+
395
+ # Sort by score and limit
396
+ all_articles.sort(key=lambda x: x.get("score", 0), reverse=True)
397
+ all_articles = all_articles[:max_articles]
398
+
399
+ result = {
400
+ "articles": all_articles,
401
+ "crawl_metadata": {
402
+ "total_fetched": len(all_articles),
403
+ "sources_crawled": sources_crawled,
404
+ "crawl_time": datetime.now(timezone.utc).isoformat(),
405
+ }
406
+ }
407
+
408
+ # Save to file if output_dir specified
409
+ if output_dir:
410
+ import os
411
+ os.makedirs(output_dir, exist_ok=True)
412
+ output_path = os.path.join(output_dir, "crawled_news.json")
413
+ with open(output_path, "w") as f:
414
+ json.dump(result, f, indent=2)
415
+ logger.info(f"Saved results to {output_path}")
416
+
417
+ return result