abx-plugins 0.9.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (355) hide show
  1. abx_plugins-0.9.2/.gitignore +6 -0
  2. abx_plugins-0.9.2/LICENSE +21 -0
  3. abx_plugins-0.9.2/PKG-INFO +208 -0
  4. abx_plugins-0.9.2/README.md +168 -0
  5. abx_plugins-0.9.2/abx_plugins/__init__.py +13 -0
  6. abx_plugins-0.9.2/abx_plugins/plugins/.coverage +0 -0
  7. abx_plugins-0.9.2/abx_plugins/plugins/__init__.py +1 -0
  8. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/__init__.py +0 -0
  9. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/config.json +21 -0
  10. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/on_Snapshot__39_accessibility.js +223 -0
  11. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/templates/icon.html +1 -0
  12. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/tests/__init__.py +0 -0
  13. abx_plugins-0.9.2/abx_plugins/plugins/accessibility/tests/test_accessibility.py +217 -0
  14. abx_plugins-0.9.2/abx_plugins/plugins/apt/__init__.py +0 -0
  15. abx_plugins-0.9.2/abx_plugins/plugins/apt/on_Binary__13_apt_install.py +101 -0
  16. abx_plugins-0.9.2/abx_plugins/plugins/apt/templates/icon.html +0 -0
  17. abx_plugins-0.9.2/abx_plugins/plugins/apt/tests/__init__.py +0 -0
  18. abx_plugins-0.9.2/abx_plugins/plugins/apt/tests/test_apt_provider.py +155 -0
  19. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/__init__.py +0 -0
  20. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/config.json +26 -0
  21. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/on_Snapshot__08_archivedotorg.bg.py +172 -0
  22. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/templates/card.html +12 -0
  23. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/templates/icon.html +1 -0
  24. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/tests/__init__.py +0 -0
  25. abx_plugins-0.9.2/abx_plugins/plugins/archivedotorg/tests/test_archivedotorg.py +160 -0
  26. abx_plugins-0.9.2/abx_plugins/plugins/brew/__init__.py +0 -0
  27. abx_plugins-0.9.2/abx_plugins/plugins/brew/on_Binary__12_brew_install.py +107 -0
  28. abx_plugins-0.9.2/abx_plugins/plugins/brew/templates/icon.html +0 -0
  29. abx_plugins-0.9.2/abx_plugins/plugins/chrome/__init__.py +0 -0
  30. abx_plugins-0.9.2/abx_plugins/plugins/chrome/chrome_utils.js +2875 -0
  31. abx_plugins-0.9.2/abx_plugins/plugins/chrome/config.json +157 -0
  32. abx_plugins-0.9.2/abx_plugins/plugins/chrome/extract_cookies.js +191 -0
  33. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Crawl__70_chrome_install.bg.py +56 -0
  34. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Crawl__90_chrome_launch.bg.js +470 -0
  35. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Crawl__91_chrome_wait.js +120 -0
  36. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Snapshot__10_chrome_tab.bg.js +330 -0
  37. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Snapshot__11_chrome_wait.js +143 -0
  38. abx_plugins-0.9.2/abx_plugins/plugins/chrome/on_Snapshot__30_chrome_navigate.js +181 -0
  39. abx_plugins-0.9.2/abx_plugins/plugins/chrome/templates/icon.html +1 -0
  40. abx_plugins-0.9.2/abx_plugins/plugins/chrome/tests/__init__.py +0 -0
  41. abx_plugins-0.9.2/abx_plugins/plugins/chrome/tests/chrome_test_helpers.py +1616 -0
  42. abx_plugins-0.9.2/abx_plugins/plugins/chrome/tests/test_chrome.py +1695 -0
  43. abx_plugins-0.9.2/abx_plugins/plugins/chrome/tests/test_chrome_test_helpers.py +293 -0
  44. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/__init__.py +0 -0
  45. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/config.json +21 -0
  46. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/on_Snapshot__21_consolelog.bg.js +214 -0
  47. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/templates/icon.html +1 -0
  48. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/tests/__init__.py +0 -0
  49. abx_plugins-0.9.2/abx_plugins/plugins/consolelog/tests/test_consolelog.py +139 -0
  50. abx_plugins-0.9.2/abx_plugins/plugins/custom/__init__.py +0 -0
  51. abx_plugins-0.9.2/abx_plugins/plugins/custom/on_Binary__14_custom_install.py +98 -0
  52. abx_plugins-0.9.2/abx_plugins/plugins/custom/templates/icon.html +0 -0
  53. abx_plugins-0.9.2/abx_plugins/plugins/custom/tests/__init__.py +0 -0
  54. abx_plugins-0.9.2/abx_plugins/plugins/custom/tests/test_custom_provider.py +153 -0
  55. abx_plugins-0.9.2/abx_plugins/plugins/defuddle/__init__.py +0 -0
  56. abx_plugins-0.9.2/abx_plugins/plugins/defuddle/config.json +39 -0
  57. abx_plugins-0.9.2/abx_plugins/plugins/defuddle/on_Crawl__41_defuddle_install.bg.py +60 -0
  58. abx_plugins-0.9.2/abx_plugins/plugins/defuddle/on_Snapshot__57_defuddle.py +214 -0
  59. abx_plugins-0.9.2/abx_plugins/plugins/defuddle/tests/test_defuddle.py +280 -0
  60. abx_plugins-0.9.2/abx_plugins/plugins/dns/__init__.py +0 -0
  61. abx_plugins-0.9.2/abx_plugins/plugins/dns/config.json +21 -0
  62. abx_plugins-0.9.2/abx_plugins/plugins/dns/on_Snapshot__22_dns.bg.js +308 -0
  63. abx_plugins-0.9.2/abx_plugins/plugins/dns/templates/icon.html +1 -0
  64. abx_plugins-0.9.2/abx_plugins/plugins/dns/tests/__init__.py +0 -0
  65. abx_plugins-0.9.2/abx_plugins/plugins/dns/tests/conftest.py +12 -0
  66. abx_plugins-0.9.2/abx_plugins/plugins/dns/tests/test_dns.py +162 -0
  67. abx_plugins-0.9.2/abx_plugins/plugins/dom/__init__.py +0 -0
  68. abx_plugins-0.9.2/abx_plugins/plugins/dom/config.json +21 -0
  69. abx_plugins-0.9.2/abx_plugins/plugins/dom/on_Snapshot__53_dom.js +174 -0
  70. abx_plugins-0.9.2/abx_plugins/plugins/dom/templates/card.html +8 -0
  71. abx_plugins-0.9.2/abx_plugins/plugins/dom/templates/icon.html +1 -0
  72. abx_plugins-0.9.2/abx_plugins/plugins/dom/tests/__init__.py +0 -0
  73. abx_plugins-0.9.2/abx_plugins/plugins/dom/tests/conftest.py +12 -0
  74. abx_plugins-0.9.2/abx_plugins/plugins/dom/tests/test_dom.py +219 -0
  75. abx_plugins-0.9.2/abx_plugins/plugins/env/__init__.py +0 -0
  76. abx_plugins-0.9.2/abx_plugins/plugins/env/on_Binary__15_env_discover.py +75 -0
  77. abx_plugins-0.9.2/abx_plugins/plugins/env/templates/icon.html +0 -0
  78. abx_plugins-0.9.2/abx_plugins/plugins/env/tests/__init__.py +0 -0
  79. abx_plugins-0.9.2/abx_plugins/plugins/env/tests/test_env_provider.py +166 -0
  80. abx_plugins-0.9.2/abx_plugins/plugins/favicon/__init__.py +0 -0
  81. abx_plugins-0.9.2/abx_plugins/plugins/favicon/config.json +26 -0
  82. abx_plugins-0.9.2/abx_plugins/plugins/favicon/on_Snapshot__11_favicon.bg.py +168 -0
  83. abx_plugins-0.9.2/abx_plugins/plugins/favicon/templates/card.html +9 -0
  84. abx_plugins-0.9.2/abx_plugins/plugins/favicon/templates/icon.html +1 -0
  85. abx_plugins-0.9.2/abx_plugins/plugins/favicon/tests/__init__.py +0 -0
  86. abx_plugins-0.9.2/abx_plugins/plugins/favicon/tests/test_favicon.py +362 -0
  87. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/__init__.py +0 -0
  88. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/config.json +45 -0
  89. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/on_Crawl__25_forumdl_install.bg.py +120 -0
  90. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/on_Snapshot__04_forumdl.bg.py +290 -0
  91. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/templates/card.html +7 -0
  92. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/templates/full.html +147 -0
  93. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/templates/icon.html +1 -0
  94. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/tests/__init__.py +0 -0
  95. abx_plugins-0.9.2/abx_plugins/plugins/forumdl/tests/test_forumdl.py +382 -0
  96. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/__init__.py +0 -0
  97. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/config.json +54 -0
  98. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/on_Crawl__20_gallerydl_install.bg.py +62 -0
  99. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/on_Snapshot__03_gallerydl.bg.py +326 -0
  100. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/templates/card.html +11 -0
  101. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/templates/full.html +28 -0
  102. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/templates/icon.html +1 -0
  103. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/tests/__init__.py +0 -0
  104. abx_plugins-0.9.2/abx_plugins/plugins/gallerydl/tests/test_gallerydl.py +391 -0
  105. abx_plugins-0.9.2/abx_plugins/plugins/git/__init__.py +0 -0
  106. abx_plugins-0.9.2/abx_plugins/plugins/git/config.json +44 -0
  107. abx_plugins-0.9.2/abx_plugins/plugins/git/on_Crawl__05_git_install.bg.py +62 -0
  108. abx_plugins-0.9.2/abx_plugins/plugins/git/on_Snapshot__05_git.bg.py +160 -0
  109. abx_plugins-0.9.2/abx_plugins/plugins/git/templates/card.html +5 -0
  110. abx_plugins-0.9.2/abx_plugins/plugins/git/templates/icon.html +1 -0
  111. abx_plugins-0.9.2/abx_plugins/plugins/git/tests/__init__.py +0 -0
  112. abx_plugins-0.9.2/abx_plugins/plugins/git/tests/test_git.py +188 -0
  113. abx_plugins-0.9.2/abx_plugins/plugins/hashes/__init__.py +0 -0
  114. abx_plugins-0.9.2/abx_plugins/plugins/hashes/config.json +20 -0
  115. abx_plugins-0.9.2/abx_plugins/plugins/hashes/on_Snapshot__93_hashes.py +206 -0
  116. abx_plugins-0.9.2/abx_plugins/plugins/hashes/templates/icon.html +1 -0
  117. abx_plugins-0.9.2/abx_plugins/plugins/hashes/tests/__init__.py +0 -0
  118. abx_plugins-0.9.2/abx_plugins/plugins/hashes/tests/test_hashes.py +170 -0
  119. abx_plugins-0.9.2/abx_plugins/plugins/headers/__init__.py +0 -0
  120. abx_plugins-0.9.2/abx_plugins/plugins/headers/config.json +21 -0
  121. abx_plugins-0.9.2/abx_plugins/plugins/headers/on_Snapshot__27_headers.bg.js +292 -0
  122. abx_plugins-0.9.2/abx_plugins/plugins/headers/templates/icon.html +1 -0
  123. abx_plugins-0.9.2/abx_plugins/plugins/headers/tests/__init__.py +0 -0
  124. abx_plugins-0.9.2/abx_plugins/plugins/headers/tests/conftest.py +12 -0
  125. abx_plugins-0.9.2/abx_plugins/plugins/headers/tests/test_headers.py +516 -0
  126. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/__init__.py +0 -0
  127. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/config.json +20 -0
  128. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/on_Snapshot__58_htmltotext.py +175 -0
  129. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/templates/icon.html +1 -0
  130. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/tests/__init__.py +0 -0
  131. abx_plugins-0.9.2/abx_plugins/plugins/htmltotext/tests/test_htmltotext.py +126 -0
  132. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/__init__.py +0 -0
  133. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/config.json +51 -0
  134. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/on_Snapshot__45_infiniscroll.js +378 -0
  135. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/templates/icon.html +1 -0
  136. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/tests/__init__.py +0 -0
  137. abx_plugins-0.9.2/abx_plugins/plugins/infiniscroll/tests/test_infiniscroll.py +386 -0
  138. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/__init__.py +0 -0
  139. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/config.json +14 -0
  140. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/on_Crawl__81_install_istilldontcareaboutcookies_extension.js +126 -0
  141. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/templates/icon.html +0 -0
  142. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/tests/__init__.py +0 -0
  143. abx_plugins-0.9.2/abx_plugins/plugins/istilldontcareaboutcookies/tests/test_istilldontcareaboutcookies.py +668 -0
  144. abx_plugins-0.9.2/abx_plugins/plugins/media/__init__.py +0 -0
  145. abx_plugins-0.9.2/abx_plugins/plugins/media/tests/__init__.py +0 -0
  146. abx_plugins-0.9.2/abx_plugins/plugins/mercury/__init__.py +0 -0
  147. abx_plugins-0.9.2/abx_plugins/plugins/mercury/config.json +40 -0
  148. abx_plugins-0.9.2/abx_plugins/plugins/mercury/on_Crawl__40_mercury_install.bg.py +90 -0
  149. abx_plugins-0.9.2/abx_plugins/plugins/mercury/on_Snapshot__57_mercury.py +223 -0
  150. abx_plugins-0.9.2/abx_plugins/plugins/mercury/templates/card.html +8 -0
  151. abx_plugins-0.9.2/abx_plugins/plugins/mercury/templates/icon.html +1 -0
  152. abx_plugins-0.9.2/abx_plugins/plugins/mercury/tests/__init__.py +0 -0
  153. abx_plugins-0.9.2/abx_plugins/plugins/mercury/tests/test_mercury.py +425 -0
  154. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/__init__.py +0 -0
  155. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/config.json +26 -0
  156. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/on_Snapshot__15_modalcloser.bg.js +322 -0
  157. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/templates/icon.html +1 -0
  158. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/tests/__init__.py +0 -0
  159. abx_plugins-0.9.2/abx_plugins/plugins/modalcloser/tests/test_modalcloser.py +552 -0
  160. abx_plugins-0.9.2/abx_plugins/plugins/npm/__init__.py +0 -0
  161. abx_plugins-0.9.2/abx_plugins/plugins/npm/on_Binary__10_npm_install.py +150 -0
  162. abx_plugins-0.9.2/abx_plugins/plugins/npm/on_Crawl__00_npm_install.py +65 -0
  163. abx_plugins-0.9.2/abx_plugins/plugins/npm/templates/icon.html +0 -0
  164. abx_plugins-0.9.2/abx_plugins/plugins/npm/tests/__init__.py +0 -0
  165. abx_plugins-0.9.2/abx_plugins/plugins/npm/tests/test_npm_provider.py +149 -0
  166. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/__init__.py +0 -0
  167. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/config.json +39 -0
  168. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/on_Crawl__30_papersdl_install.bg.py +62 -0
  169. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/on_Snapshot__66_papersdl.bg.py +263 -0
  170. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/templates/card.html +7 -0
  171. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/templates/full.html +71 -0
  172. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/templates/icon.html +1 -0
  173. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/tests/__init__.py +0 -0
  174. abx_plugins-0.9.2/abx_plugins/plugins/papersdl/tests/test_papersdl.py +305 -0
  175. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/__init__.py +0 -0
  176. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/config.json +21 -0
  177. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/on_Snapshot__75_parse_dom_outlinks.js +257 -0
  178. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/templates/icon.html +1 -0
  179. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/tests/__init__.py +0 -0
  180. abx_plugins-0.9.2/abx_plugins/plugins/parse_dom_outlinks/tests/test_parse_dom_outlinks.py +165 -0
  181. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/__init__.py +0 -0
  182. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/config.json +13 -0
  183. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/on_Snapshot__70_parse_html_urls.py +344 -0
  184. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/templates/icon.html +1 -0
  185. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/tests/__init__.py +0 -0
  186. abx_plugins-0.9.2/abx_plugins/plugins/parse_html_urls/tests/test_parse_html_urls.py +341 -0
  187. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/__init__.py +0 -0
  188. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/config.json +13 -0
  189. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/on_Snapshot__74_parse_jsonl_urls.py +260 -0
  190. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/templates/icon.html +1 -0
  191. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/tests/__init__.py +0 -0
  192. abx_plugins-0.9.2/abx_plugins/plugins/parse_jsonl_urls/tests/test_parse_jsonl_urls.py +354 -0
  193. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/__init__.py +0 -0
  194. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/config.json +13 -0
  195. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/on_Snapshot__73_parse_netscape_urls.py +307 -0
  196. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/templates/icon.html +1 -0
  197. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/tests/__init__.py +0 -0
  198. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls.py +269 -0
  199. abx_plugins-0.9.2/abx_plugins/plugins/parse_netscape_urls/tests/test_parse_netscape_urls_comprehensive.py +1093 -0
  200. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/__init__.py +0 -0
  201. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/config.json +13 -0
  202. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/on_Snapshot__72_parse_rss_urls.py +220 -0
  203. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/templates/icon.html +1 -0
  204. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/tests/__init__.py +0 -0
  205. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls.py +272 -0
  206. abx_plugins-0.9.2/abx_plugins/plugins/parse_rss_urls/tests/test_parse_rss_urls_comprehensive.py +1130 -0
  207. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/__init__.py +0 -0
  208. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/config.json +13 -0
  209. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/on_Snapshot__71_parse_txt_urls.py +206 -0
  210. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/templates/icon.html +1 -0
  211. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/tests/__init__.py +0 -0
  212. abx_plugins-0.9.2/abx_plugins/plugins/parse_txt_urls/tests/test_parse_txt_urls.py +247 -0
  213. abx_plugins-0.9.2/abx_plugins/plugins/path_utils.py +30 -0
  214. abx_plugins-0.9.2/abx_plugins/plugins/pdf/__init__.py +0 -0
  215. abx_plugins-0.9.2/abx_plugins/plugins/pdf/config.json +28 -0
  216. abx_plugins-0.9.2/abx_plugins/plugins/pdf/on_Snapshot__52_pdf.js +181 -0
  217. abx_plugins-0.9.2/abx_plugins/plugins/pdf/templates/card.html +6 -0
  218. abx_plugins-0.9.2/abx_plugins/plugins/pdf/templates/full.html +5 -0
  219. abx_plugins-0.9.2/abx_plugins/plugins/pdf/templates/icon.html +1 -0
  220. abx_plugins-0.9.2/abx_plugins/plugins/pdf/tests/__init__.py +0 -0
  221. abx_plugins-0.9.2/abx_plugins/plugins/pdf/tests/test_pdf.py +234 -0
  222. abx_plugins-0.9.2/abx_plugins/plugins/pip/__init__.py +0 -0
  223. abx_plugins-0.9.2/abx_plugins/plugins/pip/on_Binary__11_pip_install.py +153 -0
  224. abx_plugins-0.9.2/abx_plugins/plugins/pip/templates/icon.html +0 -0
  225. abx_plugins-0.9.2/abx_plugins/plugins/pip/tests/__init__.py +0 -0
  226. abx_plugins-0.9.2/abx_plugins/plugins/pip/tests/test_pip_provider.py +196 -0
  227. abx_plugins-0.9.2/abx_plugins/plugins/puppeteer/__init__.py +1 -0
  228. abx_plugins-0.9.2/abx_plugins/plugins/puppeteer/on_Binary__12_puppeteer_install.py +285 -0
  229. abx_plugins-0.9.2/abx_plugins/plugins/puppeteer/on_Crawl__60_puppeteer_install.bg.py +48 -0
  230. abx_plugins-0.9.2/abx_plugins/plugins/puppeteer/tests/__init__.py +0 -0
  231. abx_plugins-0.9.2/abx_plugins/plugins/puppeteer/tests/test_puppeteer.py +153 -0
  232. abx_plugins-0.9.2/abx_plugins/plugins/readability/__init__.py +0 -0
  233. abx_plugins-0.9.2/abx_plugins/plugins/readability/config.json +39 -0
  234. abx_plugins-0.9.2/abx_plugins/plugins/readability/on_Crawl__35_readability_install.bg.py +64 -0
  235. abx_plugins-0.9.2/abx_plugins/plugins/readability/on_Snapshot__56_readability.py +227 -0
  236. abx_plugins-0.9.2/abx_plugins/plugins/readability/templates/card.html +8 -0
  237. abx_plugins-0.9.2/abx_plugins/plugins/readability/templates/full.html +6 -0
  238. abx_plugins-0.9.2/abx_plugins/plugins/readability/templates/icon.html +1 -0
  239. abx_plugins-0.9.2/abx_plugins/plugins/readability/tests/__init__.py +0 -0
  240. abx_plugins-0.9.2/abx_plugins/plugins/readability/tests/test_readability.py +395 -0
  241. abx_plugins-0.9.2/abx_plugins/plugins/redirects/__init__.py +0 -0
  242. abx_plugins-0.9.2/abx_plugins/plugins/redirects/config.json +21 -0
  243. abx_plugins-0.9.2/abx_plugins/plugins/redirects/on_Snapshot__25_redirects.bg.js +311 -0
  244. abx_plugins-0.9.2/abx_plugins/plugins/redirects/templates/icon.html +1 -0
  245. abx_plugins-0.9.2/abx_plugins/plugins/redirects/tests/__init__.py +0 -0
  246. abx_plugins-0.9.2/abx_plugins/plugins/redirects/tests/test_redirects.py +182 -0
  247. abx_plugins-0.9.2/abx_plugins/plugins/responses/__init__.py +0 -0
  248. abx_plugins-0.9.2/abx_plugins/plugins/responses/config.json +21 -0
  249. abx_plugins-0.9.2/abx_plugins/plugins/responses/on_Snapshot__24_responses.bg.js +309 -0
  250. abx_plugins-0.9.2/abx_plugins/plugins/responses/templates/icon.html +1 -0
  251. abx_plugins-0.9.2/abx_plugins/plugins/responses/tests/__init__.py +0 -0
  252. abx_plugins-0.9.2/abx_plugins/plugins/responses/tests/test_responses.py +138 -0
  253. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/__init__.py +0 -0
  254. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/config.json +28 -0
  255. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/on_Snapshot__51_screenshot.js +205 -0
  256. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/templates/card.html +8 -0
  257. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/templates/full.html +7 -0
  258. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/templates/icon.html +1 -0
  259. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/tests/__init__.py +0 -0
  260. abx_plugins-0.9.2/abx_plugins/plugins/screenshot/tests/test_screenshot.py +582 -0
  261. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/__init__.py +0 -0
  262. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/config.json +34 -0
  263. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/on_Crawl__50_ripgrep_install.bg.py +47 -0
  264. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/search.py +111 -0
  265. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/templates/icon.html +0 -0
  266. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/tests/__init__.py +0 -0
  267. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_detection.py +163 -0
  268. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_ripgrep/tests/test_ripgrep_search.py +309 -0
  269. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sonic/__init__.py +0 -0
  270. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sonic/config.json +39 -0
  271. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sonic/on_Snapshot__91_index_sonic.py +228 -0
  272. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sonic/search.py +65 -0
  273. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sonic/templates/icon.html +1 -0
  274. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/__init__.py +0 -0
  275. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/config.json +25 -0
  276. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/on_Snapshot__90_index_sqlite.py +260 -0
  277. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/search.py +83 -0
  278. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/templates/icon.html +1 -0
  279. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/tests/__init__.py +0 -0
  280. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_hook.py +74 -0
  281. abx_plugins-0.9.2/abx_plugins/plugins/search_backend_sqlite/tests/test_sqlite_search.py +404 -0
  282. abx_plugins-0.9.2/abx_plugins/plugins/seo/__init__.py +0 -0
  283. abx_plugins-0.9.2/abx_plugins/plugins/seo/config.json +21 -0
  284. abx_plugins-0.9.2/abx_plugins/plugins/seo/on_Snapshot__38_seo.js +175 -0
  285. abx_plugins-0.9.2/abx_plugins/plugins/seo/templates/icon.html +1 -0
  286. abx_plugins-0.9.2/abx_plugins/plugins/seo/tests/__init__.py +0 -0
  287. abx_plugins-0.9.2/abx_plugins/plugins/seo/tests/test_seo.py +146 -0
  288. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/__init__.py +0 -0
  289. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/config.json +77 -0
  290. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/on_Crawl__45_singlefile_install.bg.py +70 -0
  291. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/on_Crawl__82_singlefile_install.bg.js +352 -0
  292. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/on_Snapshot__50_singlefile.py +482 -0
  293. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/singlefile_extension_save.js +176 -0
  294. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/templates/card.html +8 -0
  295. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/templates/icon.html +1 -0
  296. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/tests/__init__.py +0 -0
  297. abx_plugins-0.9.2/abx_plugins/plugins/singlefile/tests/test_singlefile.py +469 -0
  298. abx_plugins-0.9.2/abx_plugins/plugins/ssl/__init__.py +0 -0
  299. abx_plugins-0.9.2/abx_plugins/plugins/ssl/config.json +21 -0
  300. abx_plugins-0.9.2/abx_plugins/plugins/ssl/on_Snapshot__23_ssl.bg.js +240 -0
  301. abx_plugins-0.9.2/abx_plugins/plugins/ssl/templates/icon.html +1 -0
  302. abx_plugins-0.9.2/abx_plugins/plugins/ssl/tests/__init__.py +0 -0
  303. abx_plugins-0.9.2/abx_plugins/plugins/ssl/tests/test_ssl.py +170 -0
  304. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/__init__.py +0 -0
  305. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/config.json +21 -0
  306. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/on_Snapshot__26_staticfile.bg.js +415 -0
  307. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/templates/card.html +24 -0
  308. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/templates/icon.html +1 -0
  309. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/tests/__init__.py +0 -0
  310. abx_plugins-0.9.2/abx_plugins/plugins/staticfile/tests/test_staticfile.py +287 -0
  311. abx_plugins-0.9.2/abx_plugins/plugins/tests/test_dependency_boundaries.py +89 -0
  312. abx_plugins-0.9.2/abx_plugins/plugins/title/__init__.py +0 -0
  313. abx_plugins-0.9.2/abx_plugins/plugins/title/config.json +21 -0
  314. abx_plugins-0.9.2/abx_plugins/plugins/title/on_Snapshot__54_title.js +160 -0
  315. abx_plugins-0.9.2/abx_plugins/plugins/title/templates/icon.html +1 -0
  316. abx_plugins-0.9.2/abx_plugins/plugins/title/tests/__init__.py +0 -0
  317. abx_plugins-0.9.2/abx_plugins/plugins/title/tests/test_title.py +355 -0
  318. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/__init__.py +0 -0
  319. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/config.json +60 -0
  320. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/on_Crawl__41_trafilatura_install.bg.py +51 -0
  321. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/on_Snapshot__59_trafilatura.py +232 -0
  322. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/tests/__init__.py +0 -0
  323. abx_plugins-0.9.2/abx_plugins/plugins/trafilatura/tests/test_trafilatura.py +432 -0
  324. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/__init__.py +0 -0
  325. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/config.json +50 -0
  326. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/on_Crawl__83_twocaptcha_install.bg.js +77 -0
  327. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/on_Crawl__95_twocaptcha_config.js +404 -0
  328. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/templates/icon.html +0 -0
  329. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/tests/__init__.py +0 -0
  330. abx_plugins-0.9.2/abx_plugins/plugins/twocaptcha/tests/test_twocaptcha.py +363 -0
  331. abx_plugins-0.9.2/abx_plugins/plugins/ublock/__init__.py +0 -0
  332. abx_plugins-0.9.2/abx_plugins/plugins/ublock/config.json +14 -0
  333. abx_plugins-0.9.2/abx_plugins/plugins/ublock/on_Crawl__80_install_ublock_extension.js +71 -0
  334. abx_plugins-0.9.2/abx_plugins/plugins/ublock/templates/icon.html +0 -0
  335. abx_plugins-0.9.2/abx_plugins/plugins/ublock/tests/__init__.py +0 -0
  336. abx_plugins-0.9.2/abx_plugins/plugins/ublock/tests/test_ublock.py +736 -0
  337. abx_plugins-0.9.2/abx_plugins/plugins/wget/__init__.py +0 -0
  338. abx_plugins-0.9.2/abx_plugins/plugins/wget/config.json +75 -0
  339. abx_plugins-0.9.2/abx_plugins/plugins/wget/on_Crawl__10_wget_install.bg.py +111 -0
  340. abx_plugins-0.9.2/abx_plugins/plugins/wget/on_Snapshot__06_wget.bg.py +282 -0
  341. abx_plugins-0.9.2/abx_plugins/plugins/wget/templates/card.html +8 -0
  342. abx_plugins-0.9.2/abx_plugins/plugins/wget/templates/icon.html +1 -0
  343. abx_plugins-0.9.2/abx_plugins/plugins/wget/tests/__init__.py +0 -0
  344. abx_plugins-0.9.2/abx_plugins/plugins/wget/tests/test_wget.py +579 -0
  345. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/__init__.py +0 -0
  346. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/config.json +92 -0
  347. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/on_Crawl__15_ytdlp_install.bg.py +81 -0
  348. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/on_Snapshot__02_ytdlp.bg.py +355 -0
  349. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/templates/card.html +17 -0
  350. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/templates/full.html +10 -0
  351. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/templates/icon.html +1 -0
  352. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/tests/__init__.py +0 -0
  353. abx_plugins-0.9.2/abx_plugins/plugins/ytdlp/tests/test_ytdlp.py +417 -0
  354. abx_plugins-0.9.2/conftest.py +57 -0
  355. abx_plugins-0.9.2/pyproject.toml +60 -0
@@ -0,0 +1,6 @@
1
+ .DS_Store
2
+
3
+ data/
4
+ *.sqlite3*
5
+ __pycache__/
6
+ *.pyc
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Nick Sweeting
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: abx-plugins
3
+ Version: 0.9.2
4
+ Summary: ArchiveBox-compatible plugin suite (hooks, configs, binaries manifests)
5
+ Project-URL: Homepage, https://github.com/ArchiveBox/abx-plugins
6
+ Project-URL: Source, https://github.com/ArchiveBox/abx-plugins
7
+ Project-URL: Documentation, https://github.com/ArchiveBox/ArchiveBox
8
+ Project-URL: Bug Tracker, https://github.com/ArchiveBox/abx-plugins/issues
9
+ Author-email: Nick Sweeting <pyproject.toml+abx-plugins@archivebox.io>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: archivebox,hooks,plugins,scraping,web-archiving
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: abx-pkg>=0.6.3
24
+ Requires-Dist: feedparser>=6.0.0
25
+ Requires-Dist: pyright>=1.1.408
26
+ Requires-Dist: pytest-httpserver>=1.1.0
27
+ Requires-Dist: pytest>=9.0.2
28
+ Requires-Dist: requests>=2.32.5
29
+ Requires-Dist: rich-click>=1.9.7
30
+ Requires-Dist: ruff>=0.15.2
31
+ Requires-Dist: ty>=0.0.18
32
+ Provides-Extra: dev
33
+ Requires-Dist: abx-pkg>=0.6.0; extra == 'dev'
34
+ Requires-Dist: feedparser>=6.0.0; extra == 'dev'
35
+ Requires-Dist: pytest-httpserver>=1.1.0; extra == 'dev'
36
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
37
+ Requires-Dist: requests>=2.28.0; extra == 'dev'
38
+ Requires-Dist: rich-click>=1.9.7; extra == 'dev'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # abx-plugins
42
+
43
+ ArchiveBox-compatible plugin suite (hooks and config schemas).
44
+
45
+ This package contains only the plugins, to run them use [`abx-dl`](https://github.com/archiveBox/abx-dl) or [`archivebox`](https://github.com/archiveBox/ArchiveBox).
46
+
47
+ <img width="1000" height="1082" alt="Screenshot 2026-03-11 at 6 53 03 AM" src="https://github.com/user-attachments/assets/08c5f63b-05e2-4947-adca-f64e8c5ad8b3" />
48
+
49
+ ## Usage
50
+
51
+ Tools like `abx-dl` and ArchiveBox can discover plugins from this package
52
+ without symlinks or environment-variable tricks.
53
+
54
+ ## Plugin Contract
55
+
56
+ ### Directory layout
57
+
58
+ Each plugin lives under `plugins/<name>/` and may include:
59
+
60
+ - `config.json` config schema
61
+ - `on_Crawl__...` per-crawl hook scripts (optional) - install dependencies / set up shared resources
62
+ - `on_Snapshot__...` per-snapshot hooks - for each URL: do xyz...
63
+
64
+ Hooks run with:
65
+
66
+ - **SNAP_DIR** = base snapshot directory (default: `.`)
67
+ - **CRAWL_DIR** = base crawl directory (default: `.`)
68
+ - **Snapshot hook output** = `SNAP_DIR/<plugin>/...`
69
+ - **Crawl hook output** = `CRAWL_DIR/<plugin>/...`
70
+ - **Other plugin outputs** can be read via `../<other-plugin>/...` from your own output dir
71
+
72
+ ### Key environment variables
73
+
74
+ - `SNAP_DIR` - base snapshot directory (default: `.`)
75
+ - `CRAWL_DIR` - base crawl directory (default: `.`)
76
+ - `LIB_DIR` - binaries/tools root (default: `~/.config/abx/lib`)
77
+ - `PERSONAS_DIR` - persona profiles root (default: `~/.config/abx/personas`)
78
+ - `ACTIVE_PERSONA` - persona name (default: `Default`)
79
+
80
+ ### Install hook contract (concise)
81
+
82
+ Lifecycle:
83
+
84
+ 1. `on_Crawl__*install*` declares crawl dependencies.
85
+ 2. `on_Binary__*install*` resolves/installs one binary with one provider.
86
+
87
+ `on_Crawl` output (dependency declaration):
88
+
89
+ ```json
90
+ {"type":"Binary","name":"yt-dlp","binproviders":"pip,brew,apt,env","overrides":{"pip":{"packages":["yt-dlp[default]"]}},"machine_id":"<optional>"}
91
+ ```
92
+
93
+ `on_Binary` input/output:
94
+
95
+ - CLI input should accept `--binary-id`, `--machine-id`, `--name` (plus optional provider args).
96
+ - Output should emit installed facts like:
97
+
98
+ ```json
99
+ {"type":"Binary","name":"yt-dlp","abspath":"/abs/path","version":"2025.01.01","sha256":"<optional>","binprovider":"pip","machine_id":"<recommended>","binary_id":"<recommended>"}
100
+ ```
101
+
102
+ Optional machine patch record:
103
+
104
+ ```json
105
+ {"type":"Machine","config":{"PATH":"...","NODE_MODULES_DIR":"...","CHROME_BINARY":"..."}}
106
+ ```
107
+
108
+ Semantics:
109
+
110
+ - `stdout`: JSONL records only
111
+ - `stderr`: human logs/debug
112
+ - exit `0`: success or intentional skip
113
+ - exit non-zero: hard failure
114
+
115
+ State/OS:
116
+
117
+ - working dir: `CRAWL_DIR/<plugin>/`
118
+ - durable install root: `LIB_DIR` (e.g. npm prefix, pip venv, puppeteer cache)
119
+ - providers: `apt` (Debian/Ubuntu), `brew` (macOS/Linux), many hooks currently assume POSIX paths
120
+
121
+ ### Snapshot hook contract (concise)
122
+
123
+ Lifecycle:
124
+
125
+ - runs once per snapshot, typically after crawl setup
126
+ - common Chrome flow: crawl browser/session -> `chrome_tab` -> `chrome_navigate` -> downstream extractors
127
+
128
+ State:
129
+
130
+ - output cwd is usually `SNAP_DIR/<plugin>/`
131
+ - hooks may read sibling outputs via `../<plugin>/...`
132
+
133
+ Output records:
134
+
135
+ - terminal record is usually:
136
+
137
+ ```json
138
+ {"type":"ArchiveResult","status":"succeeded|skipped|failed","output_str":"path-or-message"}
139
+ ```
140
+
141
+ - discovery hooks may also emit `Snapshot` and `Tag` records before `ArchiveResult`
142
+ - search indexing hooks are a known exception and may use exit code + stderr without `ArchiveResult`
143
+
144
+ Semantics:
145
+
146
+ - `stdout`: JSONL records
147
+ - `stderr`: diagnostics/logging
148
+ - exit `0`: succeeded or skipped
149
+ - exit non-zero: failed
150
+ - current nuance: some skip/transient paths emit no JSONL and rely only on exit code
151
+
152
+ ### Rules
153
+
154
+ - all plugins should:
155
+ - *overwrite* existing files cleanly if re-run in the same dir, do not skip if files are already present (do not delete and then download, because if a process fails we want to leave previous output intact).
156
+ - the exception to always overwriting files is: chrome.pid. target_id.txt, navigation.json, etc. chrome state which gets re-used if it's not stale. we should detect if any of it is stale during chrome launch and tab creation, and clear all of it together if it is stale to prevent subtle drift errors / reuse of stale values.
157
+ - status `succeeded` if they ran and produced output
158
+ - status `noresults` if they ran succesfully but produced no meaningful output (e.g. git on a non-github url, ytdlp on a site with no media, paperdl on a site with no pdfs, etc.)
159
+ - status `skipped` if only if *config* caused them not to run (e.g. `YTDLP_ENABLED=False`)
160
+ - status `failed` if any hard dependencies are missing/invalid (e.g. chrome) or if the process exited non-0 / raised an exception
161
+ - return a short, meaningful `output_str` e.g. the page title, mimetype, return status code, or the relative path of the primary output file produced like `output.pdf` or `0 modals closed` or `The Page Title Verbatim` or `favicon.io` or `Not a git URL`
162
+ - define execution order solely using lexicographic sort order of hook filenames
163
+ - use bg hooks for either short-lived tasks that can run in parallel, or long-lived daemons that run for the whole duration of the snapshot and get killed for cleanup/final output at the end
164
+ - bg hooks that depend on other bg hook outputs must implement their own waiters internally + check that inputs are truly ready and not just that the files are present, because they may be spawned in parallel/before the earlier one's outputs are actually ready and race. e.g. html/artifact generation should usually be fg so that later bg parsing hooks can safely depend on it being finished and not just part of the file being present
165
+ - use rich_click for cli arg parsing with a uv file header when hooks are written in python. do not depend on archivebox or django, try to only depend on chrome or the output files of other plugins instead of importing code from them. the one exception is to always use chrome_utils.js as the interface for anything involving chrome.
166
+
167
+
168
+ ### Event JSONL interface (bbus-style, no dependency)
169
+
170
+ Hooks emit JSONL events to stdout. They do **not** need to import `bbus`.
171
+ The event envelope matches the bbus style so higher layers can stream/replay.
172
+
173
+ Minimal envelope:
174
+
175
+ ```json
176
+ {
177
+ "event_id": "uuidv7",
178
+ "event_type": "SnapshotCreated",
179
+ "event_created_at": "2026-02-01T20:10:22Z",
180
+ "event_parent_id": "uuidv7-or-null",
181
+ "event_schema": "abx.events.v1",
182
+ "event_path": "abx-plugins",
183
+ "data": { "...": "event-specific fields" }
184
+ }
185
+ ```
186
+
187
+ Conventions:
188
+
189
+ - Active verb names are **requests** (e.g. `BinaryInstall`, `ProcessLaunch`).
190
+ - Past tense names are **facts** (e.g. `BinaryInstalled`, `ProcessExited`).
191
+ - Plugins can emit additional fields inside `data` without coordination.
192
+
193
+ Common event types emitted by hooks:
194
+
195
+ - `ArchiveResultCreated` (status + output files)
196
+ - `Binary` records (dependency detection/install)
197
+ - `ProcessStarted` / `ProcessExited`
198
+
199
+ Higher-level tools (abx-dl / ArchiveBox) can:
200
+
201
+ - Parse these events from stdout
202
+ - Persist or project them (SQLite/JSONL/Django) without plugins knowing
203
+
204
+ Legacy note:
205
+
206
+ Some hooks still emit a lightweight JSONL record with a top-level `type` field
207
+ (e.g., `{"type": "ArchiveResult", ...}`). Runtimes should accept those and
208
+ optionally translate them into the event envelope above.
@@ -0,0 +1,168 @@
1
+ # abx-plugins
2
+
3
+ ArchiveBox-compatible plugin suite (hooks and config schemas).
4
+
5
+ This package contains only the plugins, to run them use [`abx-dl`](https://github.com/archiveBox/abx-dl) or [`archivebox`](https://github.com/archiveBox/ArchiveBox).
6
+
7
+ <img width="1000" height="1082" alt="Screenshot 2026-03-11 at 6 53 03 AM" src="https://github.com/user-attachments/assets/08c5f63b-05e2-4947-adca-f64e8c5ad8b3" />
8
+
9
+ ## Usage
10
+
11
+ Tools like `abx-dl` and ArchiveBox can discover plugins from this package
12
+ without symlinks or environment-variable tricks.
13
+
14
+ ## Plugin Contract
15
+
16
+ ### Directory layout
17
+
18
+ Each plugin lives under `plugins/<name>/` and may include:
19
+
20
+ - `config.json` config schema
21
+ - `on_Crawl__...` per-crawl hook scripts (optional) - install dependencies / set up shared resources
22
+ - `on_Snapshot__...` per-snapshot hooks - for each URL: do xyz...
23
+
24
+ Hooks run with:
25
+
26
+ - **SNAP_DIR** = base snapshot directory (default: `.`)
27
+ - **CRAWL_DIR** = base crawl directory (default: `.`)
28
+ - **Snapshot hook output** = `SNAP_DIR/<plugin>/...`
29
+ - **Crawl hook output** = `CRAWL_DIR/<plugin>/...`
30
+ - **Other plugin outputs** can be read via `../<other-plugin>/...` from your own output dir
31
+
32
+ ### Key environment variables
33
+
34
+ - `SNAP_DIR` - base snapshot directory (default: `.`)
35
+ - `CRAWL_DIR` - base crawl directory (default: `.`)
36
+ - `LIB_DIR` - binaries/tools root (default: `~/.config/abx/lib`)
37
+ - `PERSONAS_DIR` - persona profiles root (default: `~/.config/abx/personas`)
38
+ - `ACTIVE_PERSONA` - persona name (default: `Default`)
39
+
40
+ ### Install hook contract (concise)
41
+
42
+ Lifecycle:
43
+
44
+ 1. `on_Crawl__*install*` declares crawl dependencies.
45
+ 2. `on_Binary__*install*` resolves/installs one binary with one provider.
46
+
47
+ `on_Crawl` output (dependency declaration):
48
+
49
+ ```json
50
+ {"type":"Binary","name":"yt-dlp","binproviders":"pip,brew,apt,env","overrides":{"pip":{"packages":["yt-dlp[default]"]}},"machine_id":"<optional>"}
51
+ ```
52
+
53
+ `on_Binary` input/output:
54
+
55
+ - CLI input should accept `--binary-id`, `--machine-id`, `--name` (plus optional provider args).
56
+ - Output should emit installed facts like:
57
+
58
+ ```json
59
+ {"type":"Binary","name":"yt-dlp","abspath":"/abs/path","version":"2025.01.01","sha256":"<optional>","binprovider":"pip","machine_id":"<recommended>","binary_id":"<recommended>"}
60
+ ```
61
+
62
+ Optional machine patch record:
63
+
64
+ ```json
65
+ {"type":"Machine","config":{"PATH":"...","NODE_MODULES_DIR":"...","CHROME_BINARY":"..."}}
66
+ ```
67
+
68
+ Semantics:
69
+
70
+ - `stdout`: JSONL records only
71
+ - `stderr`: human logs/debug
72
+ - exit `0`: success or intentional skip
73
+ - exit non-zero: hard failure
74
+
75
+ State/OS:
76
+
77
+ - working dir: `CRAWL_DIR/<plugin>/`
78
+ - durable install root: `LIB_DIR` (e.g. npm prefix, pip venv, puppeteer cache)
79
+ - providers: `apt` (Debian/Ubuntu), `brew` (macOS/Linux), many hooks currently assume POSIX paths
80
+
81
+ ### Snapshot hook contract (concise)
82
+
83
+ Lifecycle:
84
+
85
+ - runs once per snapshot, typically after crawl setup
86
+ - common Chrome flow: crawl browser/session -> `chrome_tab` -> `chrome_navigate` -> downstream extractors
87
+
88
+ State:
89
+
90
+ - output cwd is usually `SNAP_DIR/<plugin>/`
91
+ - hooks may read sibling outputs via `../<plugin>/...`
92
+
93
+ Output records:
94
+
95
+ - terminal record is usually:
96
+
97
+ ```json
98
+ {"type":"ArchiveResult","status":"succeeded|skipped|failed","output_str":"path-or-message"}
99
+ ```
100
+
101
+ - discovery hooks may also emit `Snapshot` and `Tag` records before `ArchiveResult`
102
+ - search indexing hooks are a known exception and may use exit code + stderr without `ArchiveResult`
103
+
104
+ Semantics:
105
+
106
+ - `stdout`: JSONL records
107
+ - `stderr`: diagnostics/logging
108
+ - exit `0`: succeeded or skipped
109
+ - exit non-zero: failed
110
+ - current nuance: some skip/transient paths emit no JSONL and rely only on exit code
111
+
112
+ ### Rules
113
+
114
+ - all plugins should:
115
+ - *overwrite* existing files cleanly if re-run in the same dir, do not skip if files are already present (do not delete and then download, because if a process fails we want to leave previous output intact).
116
+ - the exception to always overwriting files is: chrome.pid. target_id.txt, navigation.json, etc. chrome state which gets re-used if it's not stale. we should detect if any of it is stale during chrome launch and tab creation, and clear all of it together if it is stale to prevent subtle drift errors / reuse of stale values.
117
+ - status `succeeded` if they ran and produced output
118
+ - status `noresults` if they ran succesfully but produced no meaningful output (e.g. git on a non-github url, ytdlp on a site with no media, paperdl on a site with no pdfs, etc.)
119
+ - status `skipped` if only if *config* caused them not to run (e.g. `YTDLP_ENABLED=False`)
120
+ - status `failed` if any hard dependencies are missing/invalid (e.g. chrome) or if the process exited non-0 / raised an exception
121
+ - return a short, meaningful `output_str` e.g. the page title, mimetype, return status code, or the relative path of the primary output file produced like `output.pdf` or `0 modals closed` or `The Page Title Verbatim` or `favicon.io` or `Not a git URL`
122
+ - define execution order solely using lexicographic sort order of hook filenames
123
+ - use bg hooks for either short-lived tasks that can run in parallel, or long-lived daemons that run for the whole duration of the snapshot and get killed for cleanup/final output at the end
124
+ - bg hooks that depend on other bg hook outputs must implement their own waiters internally + check that inputs are truly ready and not just that the files are present, because they may be spawned in parallel/before the earlier one's outputs are actually ready and race. e.g. html/artifact generation should usually be fg so that later bg parsing hooks can safely depend on it being finished and not just part of the file being present
125
+ - use rich_click for cli arg parsing with a uv file header when hooks are written in python. do not depend on archivebox or django, try to only depend on chrome or the output files of other plugins instead of importing code from them. the one exception is to always use chrome_utils.js as the interface for anything involving chrome.
126
+
127
+
128
+ ### Event JSONL interface (bbus-style, no dependency)
129
+
130
+ Hooks emit JSONL events to stdout. They do **not** need to import `bbus`.
131
+ The event envelope matches the bbus style so higher layers can stream/replay.
132
+
133
+ Minimal envelope:
134
+
135
+ ```json
136
+ {
137
+ "event_id": "uuidv7",
138
+ "event_type": "SnapshotCreated",
139
+ "event_created_at": "2026-02-01T20:10:22Z",
140
+ "event_parent_id": "uuidv7-or-null",
141
+ "event_schema": "abx.events.v1",
142
+ "event_path": "abx-plugins",
143
+ "data": { "...": "event-specific fields" }
144
+ }
145
+ ```
146
+
147
+ Conventions:
148
+
149
+ - Active verb names are **requests** (e.g. `BinaryInstall`, `ProcessLaunch`).
150
+ - Past tense names are **facts** (e.g. `BinaryInstalled`, `ProcessExited`).
151
+ - Plugins can emit additional fields inside `data` without coordination.
152
+
153
+ Common event types emitted by hooks:
154
+
155
+ - `ArchiveResultCreated` (status + output files)
156
+ - `Binary` records (dependency detection/install)
157
+ - `ProcessStarted` / `ProcessExited`
158
+
159
+ Higher-level tools (abx-dl / ArchiveBox) can:
160
+
161
+ - Parse these events from stdout
162
+ - Persist or project them (SQLite/JSONL/Django) without plugins knowing
163
+
164
+ Legacy note:
165
+
166
+ Some hooks still emit a lightweight JSONL record with a top-level `type` field
167
+ (e.g., `{"type": "ArchiveResult", ...}`). Runtimes should accept those and
168
+ optionally translate them into the event envelope above.
@@ -0,0 +1,13 @@
1
+ """Plugin suite package for ArchiveBox-compatible tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def get_plugins_dir() -> Path:
9
+ """Return the filesystem path to the bundled plugins directory."""
10
+ return Path(__file__).resolve().parent / "plugins"
11
+
12
+
13
+ __all__ = ["get_plugins_dir"]
@@ -0,0 +1 @@
1
+ """Plugin suite root package for abx-plugins."""
@@ -0,0 +1,21 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "type": "object",
4
+ "additionalProperties": false,
5
+ "required_plugins": ["chrome"],
6
+ "properties": {
7
+ "ACCESSIBILITY_ENABLED": {
8
+ "type": "boolean",
9
+ "default": true,
10
+ "x-aliases": ["SAVE_ACCESSIBILITY", "USE_ACCESSIBILITY"],
11
+ "description": "Enable accessibility tree capture"
12
+ },
13
+ "ACCESSIBILITY_TIMEOUT": {
14
+ "type": "integer",
15
+ "default": 30,
16
+ "minimum": 5,
17
+ "x-fallback": "TIMEOUT",
18
+ "description": "Timeout for accessibility capture in seconds"
19
+ }
20
+ }
21
+ }
@@ -0,0 +1,223 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Extract accessibility tree and page outline from a URL.
4
+ *
5
+ * Extracts:
6
+ * - Page outline (headings h1-h6, sections, articles)
7
+ * - Iframe tree
8
+ * - Accessibility snapshot
9
+ * - ARIA labels and roles
10
+ *
11
+ * Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>
12
+ * Output: Writes accessibility/accessibility.json
13
+ *
14
+ * Environment variables:
15
+ * SAVE_ACCESSIBILITY: Enable accessibility extraction (default: true)
16
+ */
17
+
18
+ const fs = require('fs');
19
+ const path = require('path');
20
+ // Add NODE_MODULES_DIR to module resolution paths if set
21
+ if (process.env.NODE_MODULES_DIR) module.paths.unshift(process.env.NODE_MODULES_DIR);
22
+ const puppeteer = require('puppeteer-core');
23
+ const {
24
+ getEnvBool,
25
+ getEnvInt,
26
+ parseArgs,
27
+ readCdpUrl,
28
+ connectToPage,
29
+ waitForPageLoaded,
30
+ } = require('../chrome/chrome_utils.js');
31
+
32
+ // Extractor metadata
33
+ const PLUGIN_NAME = 'accessibility';
34
+ const PLUGIN_DIR = path.basename(__dirname);
35
+ const SNAP_DIR = path.resolve((process.env.SNAP_DIR || '.').trim());
36
+ const OUTPUT_DIR = path.join(SNAP_DIR, PLUGIN_DIR);
37
+ if (!fs.existsSync(OUTPUT_DIR)) {
38
+ fs.mkdirSync(OUTPUT_DIR, { recursive: true });
39
+ }
40
+ process.chdir(OUTPUT_DIR);
41
+ const OUTPUT_FILE = 'accessibility.json';
42
+ const CHROME_SESSION_DIR = '../chrome';
43
+
44
+ // Extract accessibility info
45
+ async function extractAccessibility(url, timeoutMs) {
46
+ // Output directory is current directory (hook already runs in output dir)
47
+ const outputPath = path.join(OUTPUT_DIR, OUTPUT_FILE);
48
+
49
+ let browser = null;
50
+
51
+ try {
52
+ if (!readCdpUrl(CHROME_SESSION_DIR)) {
53
+ return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
54
+ }
55
+
56
+ const connection = await connectToPage({
57
+ chromeSessionDir: CHROME_SESSION_DIR,
58
+ timeoutMs,
59
+ puppeteer,
60
+ });
61
+ browser = connection.browser;
62
+ const page = connection.page;
63
+ await waitForPageLoaded(CHROME_SESSION_DIR, timeoutMs * 4, 200);
64
+
65
+ // Get accessibility snapshot
66
+ const accessibilityTree = await page.accessibility.snapshot({ interestingOnly: true });
67
+
68
+ // Extract page outline (headings, sections, etc.)
69
+ const outline = await page.evaluate(() => {
70
+ const headings = [];
71
+ const elements = document.querySelectorAll(
72
+ 'h1, h2, h3, h4, h5, h6, a[name], header, footer, article, main, aside, nav, section, figure, summary, table, form, iframe'
73
+ );
74
+
75
+ elements.forEach(elem => {
76
+ // Skip unnamed anchors
77
+ if (elem.tagName.toLowerCase() === 'a' && !elem.name) return;
78
+
79
+ const tagName = elem.tagName.toLowerCase();
80
+ const elemId = elem.id || elem.name || elem.getAttribute('aria-label') || elem.role || '';
81
+ const elemClasses = (elem.className || '').toString().trim().split(/\s+/).slice(0, 3).join(' .');
82
+ const action = elem.action?.split('/').pop() || '';
83
+
84
+ let summary = (elem.innerText || '').slice(0, 128);
85
+ if (summary.length >= 128) summary += '...';
86
+
87
+ let prefix = '';
88
+ let title = '';
89
+
90
+ // Format headings with # prefix
91
+ const level = parseInt(tagName.replace('h', ''));
92
+ if (!isNaN(level)) {
93
+ prefix = '#'.repeat(level);
94
+ title = elem.innerText || elemId || elemClasses;
95
+ } else {
96
+ // For other elements, create breadcrumb path
97
+ const parents = [tagName];
98
+ let node = elem.parentNode;
99
+ while (node && parents.length < 5) {
100
+ if (node.tagName) {
101
+ const tag = node.tagName.toLowerCase();
102
+ if (!['div', 'span', 'p', 'body', 'html'].includes(tag)) {
103
+ parents.unshift(tag);
104
+ } else {
105
+ parents.unshift('');
106
+ }
107
+ }
108
+ node = node.parentNode;
109
+ }
110
+ prefix = parents.join('>');
111
+
112
+ title = elemId ? `#${elemId}` : '';
113
+ if (!title && elemClasses) title = `.${elemClasses}`;
114
+ if (action) title += ` /${action}`;
115
+ if (summary && !title.includes(summary)) title += `: ${summary}`;
116
+ }
117
+
118
+ // Clean up title
119
+ title = title.replace(/\s+/g, ' ').trim();
120
+
121
+ if (prefix) {
122
+ headings.push(`${prefix} ${title}`);
123
+ }
124
+ });
125
+
126
+ return headings;
127
+ });
128
+
129
+ // Get iframe tree
130
+ const iframes = [];
131
+ function dumpFrameTree(frame, indent = '>') {
132
+ iframes.push(indent + frame.url());
133
+ for (const child of frame.childFrames()) {
134
+ dumpFrameTree(child, indent + '>');
135
+ }
136
+ }
137
+ dumpFrameTree(page.mainFrame(), '');
138
+
139
+ const accessibilityData = {
140
+ url,
141
+ headings: outline,
142
+ iframes,
143
+ tree: accessibilityTree,
144
+ };
145
+
146
+ // Write output
147
+ fs.writeFileSync(outputPath, JSON.stringify(accessibilityData, null, 2));
148
+
149
+ return { success: true, output: outputPath, accessibilityData };
150
+
151
+ } catch (e) {
152
+ return { success: false, error: `${e.name}: ${e.message}` };
153
+ } finally {
154
+ if (browser) {
155
+ browser.disconnect();
156
+ }
157
+ }
158
+ }
159
+
160
+ async function main() {
161
+ const args = parseArgs();
162
+ const url = args.url;
163
+ const snapshotId = args.snapshot_id;
164
+
165
+ if (!url || !snapshotId) {
166
+ console.error('Usage: on_Snapshot__39_accessibility.js --url=<url> --snapshot-id=<uuid>');
167
+ process.exit(1);
168
+ }
169
+
170
+ const startTs = new Date();
171
+ let status = 'failed';
172
+ let output = null;
173
+ let error = '';
174
+
175
+ try {
176
+ // Check if enabled
177
+ if (!getEnvBool('ACCESSIBILITY_ENABLED', true)) {
178
+ console.log('Skipping accessibility (ACCESSIBILITY_ENABLED=False)');
179
+ // Output clean JSONL (no RESULT_JSON= prefix)
180
+ console.log(JSON.stringify({
181
+ type: 'ArchiveResult',
182
+ status: 'skipped',
183
+ output_str: 'ACCESSIBILITY_ENABLED=False',
184
+ }));
185
+ process.exit(0);
186
+ }
187
+
188
+ const timeoutMs = getEnvInt('ACCESSIBILITY_TIMEOUT', getEnvInt('TIMEOUT', 30)) * 1000;
189
+ const result = await extractAccessibility(url, timeoutMs);
190
+
191
+ if (result.success) {
192
+ status = 'succeeded';
193
+ output = result.output;
194
+ const headingCount = result.accessibilityData.headings.length;
195
+ const iframeCount = result.accessibilityData.iframes.length;
196
+ console.log(`Accessibility extracted: ${headingCount} headings, ${iframeCount} iframes`);
197
+ } else {
198
+ status = 'failed';
199
+ error = result.error;
200
+ }
201
+ } catch (e) {
202
+ error = `${e.name}: ${e.message}`;
203
+ status = 'failed';
204
+ }
205
+
206
+ const endTs = new Date();
207
+
208
+ if (error) console.error(`ERROR: ${error}`);
209
+
210
+ // Output clean JSONL (no RESULT_JSON= prefix)
211
+ console.log(JSON.stringify({
212
+ type: 'ArchiveResult',
213
+ status,
214
+ output_str: output || error || '',
215
+ }));
216
+
217
+ process.exit(status === 'succeeded' ? 0 : 1);
218
+ }
219
+
220
+ main().catch(e => {
221
+ console.error(`Fatal error: ${e.message}`);
222
+ process.exit(1);
223
+ });
@@ -0,0 +1 @@
1
+ <span class="abx-output-icon abx-output-icon--accessibility" title="Accessibility"><svg width="16" height="16" viewBox="0 0 24 24" aria-hidden="true" focusable="false" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="4.5" r="2" fill="currentColor" stroke="none"/><path d="M4 7.5h16"/><path d="M12 7.5v12"/><path d="M7 20l5-6 5 6"/></svg></span>