ai-parrot 0.17.2__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (535) hide show
  1. agentui/.prettierrc +15 -0
  2. agentui/QUICKSTART.md +272 -0
  3. agentui/README.md +59 -0
  4. agentui/env.example +16 -0
  5. agentui/jsconfig.json +14 -0
  6. agentui/package-lock.json +4242 -0
  7. agentui/package.json +34 -0
  8. agentui/scripts/postinstall/apply-patches.mjs +260 -0
  9. agentui/src/app.css +61 -0
  10. agentui/src/app.d.ts +13 -0
  11. agentui/src/app.html +12 -0
  12. agentui/src/components/LoadingSpinner.svelte +64 -0
  13. agentui/src/components/ThemeSwitcher.svelte +159 -0
  14. agentui/src/components/index.js +4 -0
  15. agentui/src/lib/api/bots.ts +60 -0
  16. agentui/src/lib/api/chat.ts +22 -0
  17. agentui/src/lib/api/http.ts +25 -0
  18. agentui/src/lib/components/BotCard.svelte +33 -0
  19. agentui/src/lib/components/ChatBubble.svelte +63 -0
  20. agentui/src/lib/components/Toast.svelte +21 -0
  21. agentui/src/lib/config.ts +20 -0
  22. agentui/src/lib/stores/auth.svelte.ts +73 -0
  23. agentui/src/lib/stores/theme.svelte.js +64 -0
  24. agentui/src/lib/stores/toast.svelte.ts +31 -0
  25. agentui/src/lib/utils/conversation.ts +39 -0
  26. agentui/src/routes/+layout.svelte +20 -0
  27. agentui/src/routes/+page.svelte +232 -0
  28. agentui/src/routes/login/+page.svelte +200 -0
  29. agentui/src/routes/talk/[agentId]/+page.svelte +297 -0
  30. agentui/src/routes/talk/[agentId]/+page.ts +7 -0
  31. agentui/static/README.md +1 -0
  32. agentui/svelte.config.js +11 -0
  33. agentui/tailwind.config.ts +53 -0
  34. agentui/tsconfig.json +3 -0
  35. agentui/vite.config.ts +10 -0
  36. ai_parrot-0.17.2.dist-info/METADATA +472 -0
  37. ai_parrot-0.17.2.dist-info/RECORD +535 -0
  38. ai_parrot-0.17.2.dist-info/WHEEL +6 -0
  39. ai_parrot-0.17.2.dist-info/entry_points.txt +2 -0
  40. ai_parrot-0.17.2.dist-info/licenses/LICENSE +21 -0
  41. ai_parrot-0.17.2.dist-info/top_level.txt +6 -0
  42. crew-builder/.prettierrc +15 -0
  43. crew-builder/QUICKSTART.md +259 -0
  44. crew-builder/README.md +113 -0
  45. crew-builder/env.example +17 -0
  46. crew-builder/jsconfig.json +14 -0
  47. crew-builder/package-lock.json +4182 -0
  48. crew-builder/package.json +37 -0
  49. crew-builder/scripts/postinstall/apply-patches.mjs +260 -0
  50. crew-builder/src/app.css +62 -0
  51. crew-builder/src/app.d.ts +13 -0
  52. crew-builder/src/app.html +12 -0
  53. crew-builder/src/components/LoadingSpinner.svelte +64 -0
  54. crew-builder/src/components/ThemeSwitcher.svelte +149 -0
  55. crew-builder/src/components/index.js +9 -0
  56. crew-builder/src/lib/api/bots.ts +60 -0
  57. crew-builder/src/lib/api/chat.ts +80 -0
  58. crew-builder/src/lib/api/client.ts +56 -0
  59. crew-builder/src/lib/api/crew/crew.ts +136 -0
  60. crew-builder/src/lib/api/index.ts +5 -0
  61. crew-builder/src/lib/api/o365/auth.ts +65 -0
  62. crew-builder/src/lib/auth/auth.ts +54 -0
  63. crew-builder/src/lib/components/AgentNode.svelte +43 -0
  64. crew-builder/src/lib/components/BotCard.svelte +33 -0
  65. crew-builder/src/lib/components/ChatBubble.svelte +67 -0
  66. crew-builder/src/lib/components/ConfigPanel.svelte +278 -0
  67. crew-builder/src/lib/components/JsonTreeNode.svelte +76 -0
  68. crew-builder/src/lib/components/JsonViewer.svelte +24 -0
  69. crew-builder/src/lib/components/MarkdownEditor.svelte +48 -0
  70. crew-builder/src/lib/components/ThemeToggle.svelte +36 -0
  71. crew-builder/src/lib/components/Toast.svelte +67 -0
  72. crew-builder/src/lib/components/Toolbar.svelte +157 -0
  73. crew-builder/src/lib/components/index.ts +10 -0
  74. crew-builder/src/lib/config.ts +8 -0
  75. crew-builder/src/lib/stores/auth.svelte.ts +228 -0
  76. crew-builder/src/lib/stores/crewStore.ts +369 -0
  77. crew-builder/src/lib/stores/theme.svelte.js +145 -0
  78. crew-builder/src/lib/stores/toast.svelte.ts +69 -0
  79. crew-builder/src/lib/utils/conversation.ts +39 -0
  80. crew-builder/src/lib/utils/markdown.ts +122 -0
  81. crew-builder/src/lib/utils/talkHistory.ts +47 -0
  82. crew-builder/src/routes/+layout.svelte +20 -0
  83. crew-builder/src/routes/+page.svelte +539 -0
  84. crew-builder/src/routes/agents/+page.svelte +247 -0
  85. crew-builder/src/routes/agents/[agentId]/+page.svelte +288 -0
  86. crew-builder/src/routes/agents/[agentId]/+page.ts +7 -0
  87. crew-builder/src/routes/builder/+page.svelte +204 -0
  88. crew-builder/src/routes/crew/ask/+page.svelte +1052 -0
  89. crew-builder/src/routes/crew/ask/+page.ts +1 -0
  90. crew-builder/src/routes/integrations/o365/+page.svelte +304 -0
  91. crew-builder/src/routes/login/+page.svelte +197 -0
  92. crew-builder/src/routes/talk/[agentId]/+page.svelte +487 -0
  93. crew-builder/src/routes/talk/[agentId]/+page.ts +7 -0
  94. crew-builder/static/README.md +1 -0
  95. crew-builder/svelte.config.js +11 -0
  96. crew-builder/tailwind.config.ts +53 -0
  97. crew-builder/tsconfig.json +3 -0
  98. crew-builder/vite.config.ts +10 -0
  99. mcp_servers/calculator_server.py +309 -0
  100. parrot/__init__.py +27 -0
  101. parrot/__pycache__/__init__.cpython-310.pyc +0 -0
  102. parrot/__pycache__/version.cpython-310.pyc +0 -0
  103. parrot/_version.py +34 -0
  104. parrot/a2a/__init__.py +48 -0
  105. parrot/a2a/client.py +658 -0
  106. parrot/a2a/discovery.py +89 -0
  107. parrot/a2a/mixin.py +257 -0
  108. parrot/a2a/models.py +376 -0
  109. parrot/a2a/server.py +770 -0
  110. parrot/agents/__init__.py +29 -0
  111. parrot/bots/__init__.py +12 -0
  112. parrot/bots/a2a_agent.py +19 -0
  113. parrot/bots/abstract.py +3139 -0
  114. parrot/bots/agent.py +1129 -0
  115. parrot/bots/basic.py +9 -0
  116. parrot/bots/chatbot.py +669 -0
  117. parrot/bots/data.py +1618 -0
  118. parrot/bots/database/__init__.py +5 -0
  119. parrot/bots/database/abstract.py +3071 -0
  120. parrot/bots/database/cache.py +286 -0
  121. parrot/bots/database/models.py +468 -0
  122. parrot/bots/database/prompts.py +154 -0
  123. parrot/bots/database/retries.py +98 -0
  124. parrot/bots/database/router.py +269 -0
  125. parrot/bots/database/sql.py +41 -0
  126. parrot/bots/db/__init__.py +6 -0
  127. parrot/bots/db/abstract.py +556 -0
  128. parrot/bots/db/bigquery.py +602 -0
  129. parrot/bots/db/cache.py +85 -0
  130. parrot/bots/db/documentdb.py +668 -0
  131. parrot/bots/db/elastic.py +1014 -0
  132. parrot/bots/db/influx.py +898 -0
  133. parrot/bots/db/mock.py +96 -0
  134. parrot/bots/db/multi.py +783 -0
  135. parrot/bots/db/prompts.py +185 -0
  136. parrot/bots/db/sql.py +1255 -0
  137. parrot/bots/db/tools.py +212 -0
  138. parrot/bots/document.py +680 -0
  139. parrot/bots/hrbot.py +15 -0
  140. parrot/bots/kb.py +170 -0
  141. parrot/bots/mcp.py +36 -0
  142. parrot/bots/orchestration/README.md +463 -0
  143. parrot/bots/orchestration/__init__.py +1 -0
  144. parrot/bots/orchestration/agent.py +155 -0
  145. parrot/bots/orchestration/crew.py +3330 -0
  146. parrot/bots/orchestration/fsm.py +1179 -0
  147. parrot/bots/orchestration/hr.py +434 -0
  148. parrot/bots/orchestration/storage/__init__.py +4 -0
  149. parrot/bots/orchestration/storage/memory.py +100 -0
  150. parrot/bots/orchestration/storage/mixin.py +119 -0
  151. parrot/bots/orchestration/verify.py +202 -0
  152. parrot/bots/product.py +204 -0
  153. parrot/bots/prompts/__init__.py +96 -0
  154. parrot/bots/prompts/agents.py +155 -0
  155. parrot/bots/prompts/data.py +216 -0
  156. parrot/bots/prompts/output_generation.py +8 -0
  157. parrot/bots/scraper/__init__.py +3 -0
  158. parrot/bots/scraper/models.py +122 -0
  159. parrot/bots/scraper/scraper.py +1173 -0
  160. parrot/bots/scraper/templates.py +115 -0
  161. parrot/bots/stores/__init__.py +5 -0
  162. parrot/bots/stores/local.py +172 -0
  163. parrot/bots/webdev.py +81 -0
  164. parrot/cli.py +17 -0
  165. parrot/clients/__init__.py +16 -0
  166. parrot/clients/base.py +1491 -0
  167. parrot/clients/claude.py +1191 -0
  168. parrot/clients/factory.py +129 -0
  169. parrot/clients/google.py +4567 -0
  170. parrot/clients/gpt.py +1975 -0
  171. parrot/clients/grok.py +432 -0
  172. parrot/clients/groq.py +986 -0
  173. parrot/clients/hf.py +582 -0
  174. parrot/clients/models.py +18 -0
  175. parrot/conf.py +395 -0
  176. parrot/embeddings/__init__.py +9 -0
  177. parrot/embeddings/base.py +157 -0
  178. parrot/embeddings/google.py +98 -0
  179. parrot/embeddings/huggingface.py +74 -0
  180. parrot/embeddings/openai.py +84 -0
  181. parrot/embeddings/processor.py +88 -0
  182. parrot/exceptions.c +13868 -0
  183. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  184. parrot/exceptions.pxd +22 -0
  185. parrot/exceptions.pxi +15 -0
  186. parrot/exceptions.pyx +44 -0
  187. parrot/generators/__init__.py +29 -0
  188. parrot/generators/base.py +200 -0
  189. parrot/generators/html.py +293 -0
  190. parrot/generators/react.py +205 -0
  191. parrot/generators/streamlit.py +203 -0
  192. parrot/generators/template.py +105 -0
  193. parrot/handlers/__init__.py +4 -0
  194. parrot/handlers/agent.py +861 -0
  195. parrot/handlers/agents/__init__.py +1 -0
  196. parrot/handlers/agents/abstract.py +900 -0
  197. parrot/handlers/bots.py +338 -0
  198. parrot/handlers/chat.py +915 -0
  199. parrot/handlers/creation.sql +192 -0
  200. parrot/handlers/crew/ARCHITECTURE.md +362 -0
  201. parrot/handlers/crew/README_BOTMANAGER_PERSISTENCE.md +303 -0
  202. parrot/handlers/crew/README_REDIS_PERSISTENCE.md +366 -0
  203. parrot/handlers/crew/__init__.py +0 -0
  204. parrot/handlers/crew/handler.py +801 -0
  205. parrot/handlers/crew/models.py +229 -0
  206. parrot/handlers/crew/redis_persistence.py +523 -0
  207. parrot/handlers/jobs/__init__.py +10 -0
  208. parrot/handlers/jobs/job.py +384 -0
  209. parrot/handlers/jobs/mixin.py +627 -0
  210. parrot/handlers/jobs/models.py +115 -0
  211. parrot/handlers/jobs/worker.py +31 -0
  212. parrot/handlers/models.py +596 -0
  213. parrot/handlers/o365_auth.py +105 -0
  214. parrot/handlers/stream.py +337 -0
  215. parrot/interfaces/__init__.py +6 -0
  216. parrot/interfaces/aws.py +143 -0
  217. parrot/interfaces/credentials.py +113 -0
  218. parrot/interfaces/database.py +27 -0
  219. parrot/interfaces/google.py +1123 -0
  220. parrot/interfaces/hierarchy.py +1227 -0
  221. parrot/interfaces/http.py +651 -0
  222. parrot/interfaces/images/__init__.py +0 -0
  223. parrot/interfaces/images/plugins/__init__.py +24 -0
  224. parrot/interfaces/images/plugins/abstract.py +58 -0
  225. parrot/interfaces/images/plugins/analisys.py +148 -0
  226. parrot/interfaces/images/plugins/classify.py +150 -0
  227. parrot/interfaces/images/plugins/classifybase.py +182 -0
  228. parrot/interfaces/images/plugins/detect.py +150 -0
  229. parrot/interfaces/images/plugins/exif.py +1103 -0
  230. parrot/interfaces/images/plugins/hash.py +52 -0
  231. parrot/interfaces/images/plugins/vision.py +104 -0
  232. parrot/interfaces/images/plugins/yolo.py +66 -0
  233. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  234. parrot/interfaces/o365.py +978 -0
  235. parrot/interfaces/onedrive.py +822 -0
  236. parrot/interfaces/sharepoint.py +1435 -0
  237. parrot/interfaces/soap.py +257 -0
  238. parrot/loaders/__init__.py +8 -0
  239. parrot/loaders/abstract.py +1131 -0
  240. parrot/loaders/audio.py +199 -0
  241. parrot/loaders/basepdf.py +53 -0
  242. parrot/loaders/basevideo.py +1568 -0
  243. parrot/loaders/csv.py +409 -0
  244. parrot/loaders/docx.py +116 -0
  245. parrot/loaders/epubloader.py +316 -0
  246. parrot/loaders/excel.py +199 -0
  247. parrot/loaders/factory.py +55 -0
  248. parrot/loaders/files/__init__.py +0 -0
  249. parrot/loaders/files/abstract.py +39 -0
  250. parrot/loaders/files/html.py +26 -0
  251. parrot/loaders/files/text.py +63 -0
  252. parrot/loaders/html.py +152 -0
  253. parrot/loaders/markdown.py +442 -0
  254. parrot/loaders/pdf.py +373 -0
  255. parrot/loaders/pdfmark.py +320 -0
  256. parrot/loaders/pdftables.py +506 -0
  257. parrot/loaders/ppt.py +476 -0
  258. parrot/loaders/qa.py +63 -0
  259. parrot/loaders/splitters/__init__.py +10 -0
  260. parrot/loaders/splitters/base.py +138 -0
  261. parrot/loaders/splitters/md.py +228 -0
  262. parrot/loaders/splitters/token.py +143 -0
  263. parrot/loaders/txt.py +26 -0
  264. parrot/loaders/video.py +89 -0
  265. parrot/loaders/videolocal.py +218 -0
  266. parrot/loaders/videounderstanding.py +377 -0
  267. parrot/loaders/vimeo.py +167 -0
  268. parrot/loaders/web.py +599 -0
  269. parrot/loaders/youtube.py +504 -0
  270. parrot/manager/__init__.py +5 -0
  271. parrot/manager/manager.py +1030 -0
  272. parrot/mcp/__init__.py +28 -0
  273. parrot/mcp/adapter.py +105 -0
  274. parrot/mcp/cli.py +174 -0
  275. parrot/mcp/client.py +119 -0
  276. parrot/mcp/config.py +75 -0
  277. parrot/mcp/integration.py +842 -0
  278. parrot/mcp/oauth.py +933 -0
  279. parrot/mcp/server.py +225 -0
  280. parrot/mcp/transports/__init__.py +3 -0
  281. parrot/mcp/transports/base.py +279 -0
  282. parrot/mcp/transports/grpc_session.py +163 -0
  283. parrot/mcp/transports/http.py +312 -0
  284. parrot/mcp/transports/mcp.proto +108 -0
  285. parrot/mcp/transports/quic.py +1082 -0
  286. parrot/mcp/transports/sse.py +330 -0
  287. parrot/mcp/transports/stdio.py +309 -0
  288. parrot/mcp/transports/unix.py +395 -0
  289. parrot/mcp/transports/websocket.py +547 -0
  290. parrot/memory/__init__.py +16 -0
  291. parrot/memory/abstract.py +209 -0
  292. parrot/memory/agent.py +32 -0
  293. parrot/memory/cache.py +175 -0
  294. parrot/memory/core.py +555 -0
  295. parrot/memory/file.py +153 -0
  296. parrot/memory/mem.py +131 -0
  297. parrot/memory/redis.py +613 -0
  298. parrot/models/__init__.py +46 -0
  299. parrot/models/basic.py +118 -0
  300. parrot/models/compliance.py +208 -0
  301. parrot/models/crew.py +395 -0
  302. parrot/models/detections.py +654 -0
  303. parrot/models/generation.py +85 -0
  304. parrot/models/google.py +223 -0
  305. parrot/models/groq.py +23 -0
  306. parrot/models/openai.py +30 -0
  307. parrot/models/outputs.py +285 -0
  308. parrot/models/responses.py +938 -0
  309. parrot/notifications/__init__.py +743 -0
  310. parrot/openapi/__init__.py +3 -0
  311. parrot/openapi/components.yaml +641 -0
  312. parrot/openapi/config.py +322 -0
  313. parrot/outputs/__init__.py +32 -0
  314. parrot/outputs/formats/__init__.py +108 -0
  315. parrot/outputs/formats/altair.py +359 -0
  316. parrot/outputs/formats/application.py +122 -0
  317. parrot/outputs/formats/base.py +351 -0
  318. parrot/outputs/formats/bokeh.py +356 -0
  319. parrot/outputs/formats/card.py +424 -0
  320. parrot/outputs/formats/chart.py +436 -0
  321. parrot/outputs/formats/d3.py +255 -0
  322. parrot/outputs/formats/echarts.py +310 -0
  323. parrot/outputs/formats/generators/__init__.py +0 -0
  324. parrot/outputs/formats/generators/abstract.py +61 -0
  325. parrot/outputs/formats/generators/panel.py +145 -0
  326. parrot/outputs/formats/generators/streamlit.py +86 -0
  327. parrot/outputs/formats/generators/terminal.py +63 -0
  328. parrot/outputs/formats/holoviews.py +310 -0
  329. parrot/outputs/formats/html.py +147 -0
  330. parrot/outputs/formats/jinja2.py +46 -0
  331. parrot/outputs/formats/json.py +87 -0
  332. parrot/outputs/formats/map.py +933 -0
  333. parrot/outputs/formats/markdown.py +172 -0
  334. parrot/outputs/formats/matplotlib.py +237 -0
  335. parrot/outputs/formats/mixins/__init__.py +0 -0
  336. parrot/outputs/formats/mixins/emaps.py +855 -0
  337. parrot/outputs/formats/plotly.py +341 -0
  338. parrot/outputs/formats/seaborn.py +310 -0
  339. parrot/outputs/formats/table.py +397 -0
  340. parrot/outputs/formats/template_report.py +138 -0
  341. parrot/outputs/formats/yaml.py +125 -0
  342. parrot/outputs/formatter.py +152 -0
  343. parrot/outputs/templates/__init__.py +95 -0
  344. parrot/pipelines/__init__.py +0 -0
  345. parrot/pipelines/abstract.py +210 -0
  346. parrot/pipelines/detector.py +124 -0
  347. parrot/pipelines/models.py +90 -0
  348. parrot/pipelines/planogram.py +3002 -0
  349. parrot/pipelines/table.sql +97 -0
  350. parrot/plugins/__init__.py +106 -0
  351. parrot/plugins/importer.py +80 -0
  352. parrot/py.typed +0 -0
  353. parrot/registry/__init__.py +18 -0
  354. parrot/registry/registry.py +594 -0
  355. parrot/scheduler/__init__.py +1189 -0
  356. parrot/scheduler/models.py +60 -0
  357. parrot/security/__init__.py +16 -0
  358. parrot/security/prompt_injection.py +268 -0
  359. parrot/security/security_events.sql +25 -0
  360. parrot/services/__init__.py +1 -0
  361. parrot/services/mcp/__init__.py +8 -0
  362. parrot/services/mcp/config.py +13 -0
  363. parrot/services/mcp/server.py +295 -0
  364. parrot/services/o365_remote_auth.py +235 -0
  365. parrot/stores/__init__.py +7 -0
  366. parrot/stores/abstract.py +352 -0
  367. parrot/stores/arango.py +1090 -0
  368. parrot/stores/bigquery.py +1377 -0
  369. parrot/stores/cache.py +106 -0
  370. parrot/stores/empty.py +10 -0
  371. parrot/stores/faiss_store.py +1157 -0
  372. parrot/stores/kb/__init__.py +9 -0
  373. parrot/stores/kb/abstract.py +68 -0
  374. parrot/stores/kb/cache.py +165 -0
  375. parrot/stores/kb/doc.py +325 -0
  376. parrot/stores/kb/hierarchy.py +346 -0
  377. parrot/stores/kb/local.py +457 -0
  378. parrot/stores/kb/prompt.py +28 -0
  379. parrot/stores/kb/redis.py +659 -0
  380. parrot/stores/kb/store.py +115 -0
  381. parrot/stores/kb/user.py +374 -0
  382. parrot/stores/models.py +59 -0
  383. parrot/stores/pgvector.py +3 -0
  384. parrot/stores/postgres.py +2853 -0
  385. parrot/stores/utils/__init__.py +0 -0
  386. parrot/stores/utils/chunking.py +197 -0
  387. parrot/telemetry/__init__.py +3 -0
  388. parrot/telemetry/mixin.py +111 -0
  389. parrot/template/__init__.py +3 -0
  390. parrot/template/engine.py +259 -0
  391. parrot/tools/__init__.py +23 -0
  392. parrot/tools/abstract.py +644 -0
  393. parrot/tools/agent.py +363 -0
  394. parrot/tools/arangodbsearch.py +537 -0
  395. parrot/tools/arxiv_tool.py +188 -0
  396. parrot/tools/calculator/__init__.py +3 -0
  397. parrot/tools/calculator/operations/__init__.py +38 -0
  398. parrot/tools/calculator/operations/calculus.py +80 -0
  399. parrot/tools/calculator/operations/statistics.py +76 -0
  400. parrot/tools/calculator/tool.py +150 -0
  401. parrot/tools/cloudwatch.py +988 -0
  402. parrot/tools/codeinterpreter/__init__.py +127 -0
  403. parrot/tools/codeinterpreter/executor.py +371 -0
  404. parrot/tools/codeinterpreter/internals.py +473 -0
  405. parrot/tools/codeinterpreter/models.py +643 -0
  406. parrot/tools/codeinterpreter/prompts.py +224 -0
  407. parrot/tools/codeinterpreter/tool.py +664 -0
  408. parrot/tools/company_info/__init__.py +6 -0
  409. parrot/tools/company_info/tool.py +1138 -0
  410. parrot/tools/correlationanalysis.py +437 -0
  411. parrot/tools/database/abstract.py +286 -0
  412. parrot/tools/database/bq.py +115 -0
  413. parrot/tools/database/cache.py +284 -0
  414. parrot/tools/database/models.py +95 -0
  415. parrot/tools/database/pg.py +343 -0
  416. parrot/tools/databasequery.py +1159 -0
  417. parrot/tools/db.py +1800 -0
  418. parrot/tools/ddgo.py +370 -0
  419. parrot/tools/decorators.py +271 -0
  420. parrot/tools/dftohtml.py +282 -0
  421. parrot/tools/document.py +549 -0
  422. parrot/tools/ecs.py +819 -0
  423. parrot/tools/edareport.py +368 -0
  424. parrot/tools/elasticsearch.py +1049 -0
  425. parrot/tools/employees.py +462 -0
  426. parrot/tools/epson/__init__.py +96 -0
  427. parrot/tools/excel.py +683 -0
  428. parrot/tools/file/__init__.py +13 -0
  429. parrot/tools/file/abstract.py +76 -0
  430. parrot/tools/file/gcs.py +378 -0
  431. parrot/tools/file/local.py +284 -0
  432. parrot/tools/file/s3.py +511 -0
  433. parrot/tools/file/tmp.py +309 -0
  434. parrot/tools/file/tool.py +501 -0
  435. parrot/tools/file_reader.py +129 -0
  436. parrot/tools/flowtask/__init__.py +19 -0
  437. parrot/tools/flowtask/tool.py +761 -0
  438. parrot/tools/gittoolkit.py +508 -0
  439. parrot/tools/google/__init__.py +18 -0
  440. parrot/tools/google/base.py +169 -0
  441. parrot/tools/google/tools.py +1251 -0
  442. parrot/tools/googlelocation.py +5 -0
  443. parrot/tools/googleroutes.py +5 -0
  444. parrot/tools/googlesearch.py +5 -0
  445. parrot/tools/googlesitesearch.py +5 -0
  446. parrot/tools/googlevoice.py +2 -0
  447. parrot/tools/gvoice.py +695 -0
  448. parrot/tools/ibisworld/README.md +225 -0
  449. parrot/tools/ibisworld/__init__.py +11 -0
  450. parrot/tools/ibisworld/tool.py +366 -0
  451. parrot/tools/jiratoolkit.py +1718 -0
  452. parrot/tools/manager.py +1098 -0
  453. parrot/tools/math.py +152 -0
  454. parrot/tools/metadata.py +476 -0
  455. parrot/tools/msteams.py +1621 -0
  456. parrot/tools/msword.py +635 -0
  457. parrot/tools/multidb.py +580 -0
  458. parrot/tools/multistoresearch.py +369 -0
  459. parrot/tools/networkninja.py +167 -0
  460. parrot/tools/nextstop/__init__.py +4 -0
  461. parrot/tools/nextstop/base.py +286 -0
  462. parrot/tools/nextstop/employee.py +733 -0
  463. parrot/tools/nextstop/store.py +462 -0
  464. parrot/tools/notification.py +435 -0
  465. parrot/tools/o365/__init__.py +42 -0
  466. parrot/tools/o365/base.py +295 -0
  467. parrot/tools/o365/bundle.py +522 -0
  468. parrot/tools/o365/events.py +554 -0
  469. parrot/tools/o365/mail.py +992 -0
  470. parrot/tools/o365/onedrive.py +497 -0
  471. parrot/tools/o365/sharepoint.py +641 -0
  472. parrot/tools/openapi_toolkit.py +904 -0
  473. parrot/tools/openweather.py +527 -0
  474. parrot/tools/pdfprint.py +1001 -0
  475. parrot/tools/powerbi.py +518 -0
  476. parrot/tools/powerpoint.py +1113 -0
  477. parrot/tools/pricestool.py +146 -0
  478. parrot/tools/products/__init__.py +246 -0
  479. parrot/tools/prophet_tool.py +171 -0
  480. parrot/tools/pythonpandas.py +630 -0
  481. parrot/tools/pythonrepl.py +910 -0
  482. parrot/tools/qsource.py +436 -0
  483. parrot/tools/querytoolkit.py +395 -0
  484. parrot/tools/quickeda.py +827 -0
  485. parrot/tools/resttool.py +553 -0
  486. parrot/tools/retail/__init__.py +0 -0
  487. parrot/tools/retail/bby.py +528 -0
  488. parrot/tools/sandboxtool.py +703 -0
  489. parrot/tools/sassie/__init__.py +352 -0
  490. parrot/tools/scraping/__init__.py +7 -0
  491. parrot/tools/scraping/docs/select.md +466 -0
  492. parrot/tools/scraping/documentation.md +1278 -0
  493. parrot/tools/scraping/driver.py +436 -0
  494. parrot/tools/scraping/models.py +576 -0
  495. parrot/tools/scraping/options.py +85 -0
  496. parrot/tools/scraping/orchestrator.py +517 -0
  497. parrot/tools/scraping/readme.md +740 -0
  498. parrot/tools/scraping/tool.py +3115 -0
  499. parrot/tools/seasonaldetection.py +642 -0
  500. parrot/tools/shell_tool/__init__.py +5 -0
  501. parrot/tools/shell_tool/actions.py +408 -0
  502. parrot/tools/shell_tool/engine.py +155 -0
  503. parrot/tools/shell_tool/models.py +322 -0
  504. parrot/tools/shell_tool/tool.py +442 -0
  505. parrot/tools/site_search.py +214 -0
  506. parrot/tools/textfile.py +418 -0
  507. parrot/tools/think.py +378 -0
  508. parrot/tools/toolkit.py +298 -0
  509. parrot/tools/webapp_tool.py +187 -0
  510. parrot/tools/whatif.py +1279 -0
  511. parrot/tools/workday/MULTI_WSDL_EXAMPLE.md +249 -0
  512. parrot/tools/workday/__init__.py +6 -0
  513. parrot/tools/workday/models.py +1389 -0
  514. parrot/tools/workday/tool.py +1293 -0
  515. parrot/tools/yfinance_tool.py +306 -0
  516. parrot/tools/zipcode.py +217 -0
  517. parrot/utils/__init__.py +2 -0
  518. parrot/utils/helpers.py +73 -0
  519. parrot/utils/parsers/__init__.py +5 -0
  520. parrot/utils/parsers/toml.c +12078 -0
  521. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  522. parrot/utils/parsers/toml.pyx +21 -0
  523. parrot/utils/toml.py +11 -0
  524. parrot/utils/types.cpp +20936 -0
  525. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  526. parrot/utils/types.pyx +213 -0
  527. parrot/utils/uv.py +11 -0
  528. parrot/version.py +10 -0
  529. parrot/yaml-rs/Cargo.lock +350 -0
  530. parrot/yaml-rs/Cargo.toml +19 -0
  531. parrot/yaml-rs/pyproject.toml +19 -0
  532. parrot/yaml-rs/python/yaml_rs/__init__.py +81 -0
  533. parrot/yaml-rs/src/lib.rs +222 -0
  534. requirements/docker-compose.yml +24 -0
  535. requirements/requirements-dev.txt +21 -0
@@ -0,0 +1,1138 @@
1
+ """
2
+ CompanyInfoToolkit - Unified toolkit for scraping company information from multiple sources.
3
+
4
+ This toolkit extends AbstractToolkit and provides methods to scrape company data from:
5
+ - explorium.ai
6
+ - leadiq.com
7
+ - rocketreach.co
8
+ - siccode.com
9
+ - zoominfo.com
10
+
11
+ Each public async method becomes a tool that:
12
+ 1. Performs a Google site search for the company
13
+ 2. Fetches the first result using Selenium
14
+ 3. Parses the page with BeautifulSoup
15
+ 4. Extracts company information
16
+ 5. Returns structured data (CompanyInfo model or JSON)
17
+
18
+ Dependencies:
19
+ - selenium
20
+ - beautifulsoup4
21
+ - pydantic
22
+ - google-api-python-client
23
+ - aiohttp
24
+
25
+ Example usage:
26
+ toolkit = CompanyInfoToolkit(
27
+ google_api_key="your-api-key",
28
+ google_cse_id="your-cse-id",
29
+ use_proxy=False,
30
+ headless=True
31
+ )
32
+
33
+ # Get all tools
34
+ tools = toolkit.get_tools()
35
+
36
+ # Or use methods directly
37
+ result = await toolkit.scrape_zoominfo("PetSmart")
38
+ print(result.company_name)
39
+ """
40
+ from __future__ import annotations
41
+
42
+ import asyncio
43
+ import json
44
+ import re
45
+ import time
46
+ from typing import Dict, List, Any, Optional, Union
47
+ from urllib.parse import urljoin
48
+ from bs4 import BeautifulSoup as bs
49
+ from pydantic import BaseModel, Field, model_validator
50
+ from googleapiclient.discovery import build
51
+ from navconfig import config
52
+ from navconfig.logging import logging
53
+ try:
54
+ from selenium import webdriver
55
+ from selenium.webdriver.chrome.options import Options
56
+ from selenium.webdriver.common.by import By
57
+ from selenium.webdriver.support import expected_conditions as EC
58
+ from selenium.webdriver.support.ui import WebDriverWait
59
+ from selenium.common.exceptions import (
60
+ TimeoutException,
61
+ NoSuchElementException,
62
+ WebDriverException
63
+ )
64
+ except ImportError as e:
65
+ raise ImportError("Please install selenium: pip install selenium") from e
66
+
67
+ from ..toolkit import AbstractToolkit
68
+ from ..decorators import tool_schema
69
+ from ..scraping.driver import SeleniumSetup
70
+
71
+
72
+ # ===========================
73
+ # Pydantic Models
74
+ # ===========================
75
+
76
+ class CompanyInput(BaseModel):
77
+ """Input model for company scraping tools."""
78
+ company_name: str = Field(..., description="Name of the company to search for")
79
+ return_json: bool = Field(
80
+ False,
81
+ description="If True, return JSON string instead of CompanyInfo object"
82
+ )
83
+
84
+ class CompanyInfo(BaseModel):
85
+ """
86
+ Structured output model for company information.
87
+ Homogenized across all scraping platforms.
88
+ """
89
+ # Search metadata
90
+ search_term: Optional[str] = Field(None, description="Search term used")
91
+ search_url: Optional[str] = Field(None, description="URL of the scraped page")
92
+ source_platform: Optional[str] = Field(None, description="Source platform (e.g., zoominfo, leadiq)")
93
+ scrape_status: str = Field("pending", description="Status: pending, success, no_data, error")
94
+
95
+ # Company basic info
96
+ company_name: Optional[str] = Field(None, description="Company name")
97
+ logo_url: Optional[str] = Field(None, description="Company logo URL")
98
+ company_description: Optional[str] = Field(None, description="Company description")
99
+
100
+ # Location info
101
+ headquarters: Optional[str] = Field(None, description="Headquarters address")
102
+ address: Optional[str] = Field(None, description="Street address")
103
+ city: Optional[str] = Field(None, description="City")
104
+ state: Optional[str] = Field(None, description="State/Province")
105
+ zip_code: Optional[str] = Field(None, description="ZIP/Postal code")
106
+ country: Optional[str] = Field(None, description="Country")
107
+ metro_area: Optional[str] = Field(None, description="Metro area")
108
+
109
+ # Contact info
110
+ phone_number: Optional[str] = Field(None, description="Phone number")
111
+ website: Optional[str] = Field(None, description="Company website")
112
+
113
+ # Business classification
114
+ industry: Optional[Union[str, List[str]]] = Field(None, description="Industry")
115
+ industry_category: Optional[str] = Field(None, description="Industry category")
116
+ category: Optional[str] = Field(None, description="Business category")
117
+ keywords: Optional[List[str]] = Field(None, description="Business keywords")
118
+ naics_code: Optional[str] = Field(None, description="NAICS code(s)")
119
+ sic_code: Optional[str] = Field(None, description="SIC code(s)")
120
+
121
+ # Financial & size info
122
+ stock_symbol: Optional[str] = Field(None, description="Stock ticker symbol")
123
+ revenue_range: Optional[str] = Field(None, description="Revenue range")
124
+ employee_count: Optional[str] = Field(None, description="Number of employees")
125
+ number_employees: Optional[str] = Field(None, description="Employee count description")
126
+ company_size: Optional[str] = Field(None, description="Company size category")
127
+ founded: Optional[str] = Field(None, description="Year founded")
128
+ funding: Optional[str] = Field(None, description="Funding information")
129
+ years_in_business: Optional[str] = Field(None, description="Years in business")
130
+
131
+ # Additional info
132
+ executives: Optional[List[Dict[str, str]]] = Field(None, description="Executive team")
133
+ similar_companies: Optional[Union[str, List[Dict]]] = Field(None, description="Similar companies")
134
+ social_media: Optional[Dict[str, str]] = Field(None, description="Social media links")
135
+
136
+ # Metadata
137
+ timestamp: Optional[str] = Field(None, description="Scrape timestamp")
138
+ error_message: Optional[str] = Field(None, description="Error message if any")
139
+
140
+ def to_json(self, **kwargs) -> str:
141
+ """Convert to JSON string."""
142
+ return self.model_dump_json(exclude_none=True, **kwargs)
143
+
144
+ @classmethod
145
+ def from_dict(cls, data: Dict[str, Any]) -> "CompanyInfo":
146
+ """Create from dictionary."""
147
+ return cls(**data)
148
+
149
+
150
+ class GoogleSearchResult(BaseModel):
151
+ """Result from Google site search."""
152
+ query: str = Field(description="Search query used")
153
+ site: str = Field(description="Site searched")
154
+ url: Optional[str] = Field(None, description="First result URL")
155
+ title: Optional[str] = Field(None, description="Result title")
156
+ snippet: Optional[str] = Field(None, description="Result snippet")
157
+ total_results: int = Field(0, description="Total results found")
158
+
159
+
160
+ # ===========================
161
+ # Main Toolkit Class
162
+ # ===========================
163
+
164
+ class CompanyInfoToolkit(AbstractToolkit):
165
+ """
166
+ Toolkit for scraping company information from multiple platforms.
167
+
168
+ Each public async method is automatically converted to a tool by AbstractToolkit.
169
+ Methods perform:
170
+ 1. Google site search for company
171
+ 2. Selenium page fetch
172
+ 3. BeautifulSoup parsing
173
+ 4. Structured data extraction
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ google_api_key: Optional[str] = None,
179
+ google_cse_id: Optional[str] = None,
180
+ browser: str = 'chrome',
181
+ headless: bool = True,
182
+ timeout: int = 30,
183
+ auto_install: bool = True,
184
+ mobile: bool = False,
185
+ mobile_device: Optional[str] = None,
186
+ use_undetected: bool = False,
187
+ **kwargs
188
+ ):
189
+ """
190
+ Initialize the CompanyInfoToolkit.
191
+
192
+ Args:
193
+ google_api_key: Google Custom Search API key
194
+ google_cse_id: Google Custom Search Engine ID
195
+ browser: Browser type ('chrome', 'firefox', 'edge', 'safari', 'undetected')
196
+ headless: Run browser in headless mode
197
+ timeout: Default timeout for page loads (seconds)
198
+ auto_install: Auto-install webdriver if not found
199
+ mobile: Enable mobile emulation (Chrome only)
200
+ mobile_device: Specific mobile device to emulate
201
+ use_undetected: Use undetected-chromedriver (requires package)
202
+ **kwargs: Additional arguments passed to AbstractToolkit and SeleniumSetup
203
+ """
204
+ super().__init__(**kwargs)
205
+
206
+ # Google Search configuration
207
+ self.google_api_key = google_api_key or config.get('GOOGLE_SEARCH_API_KEY')
208
+ self.google_cse_id = google_cse_id or config.get('GOOGLE_SEARCH_ENGINE_ID')
209
+ # Service Selection:
210
+ self.service = build("customsearch", "v1", developerKey=self.google_api_key)
211
+
212
+ # Browser configuration for SeleniumSetup
213
+ self.browser_config = {
214
+ 'browser': 'undetected' if use_undetected else browser,
215
+ 'headless': headless,
216
+ 'auto_install': auto_install,
217
+ 'mobile': mobile,
218
+ 'mobile_device': mobile_device,
219
+ 'timeout': timeout,
220
+ **kwargs # Pass through any additional kwargs
221
+ }
222
+ # Selenium setup instance and driver
223
+ self._selenium_setup: Optional[SeleniumSetup] = None
224
+
225
+ # Current driver instance
226
+ self._driver = None
227
+
228
+ # Logger
229
+ self.logger = logging.getLogger(self.__class__.__name__)
230
+
231
+ # ===========================
232
+ # Core Utility Methods
233
+ # ===========================
234
+ async def _get_driver(self) -> webdriver.Chrome:
235
+ """Get or create Selenium WebDriver instance using SeleniumSetup."""
236
+ if self._driver is None:
237
+ if SeleniumSetup is None:
238
+ raise ImportError(
239
+ "SeleniumSetup not available. Please ensure parrot.tools.scraping.driver is installed."
240
+ )
241
+
242
+ self.logger.info("Initializing Selenium WebDriver...")
243
+
244
+ # Create SeleniumSetup instance
245
+ self._selenium_setup = SeleniumSetup(**self.browser_config)
246
+
247
+ # Get driver using SeleniumSetup's async method
248
+ self._driver = await self._selenium_setup.get_driver()
249
+
250
+ self.logger.info("Selenium WebDriver initialized successfully")
251
+
252
+ return self._driver
253
+
254
+ async def _close_driver(self):
255
+ """Close the Selenium driver if open."""
256
+ if self._driver is not None:
257
+ try:
258
+ loop = asyncio.get_running_loop()
259
+ await loop.run_in_executor(None, self._driver.quit)
260
+ self.logger.info("Selenium WebDriver closed")
261
+ except Exception as e:
262
+ self.logger.warning(f"Error closing driver: {e}")
263
+ finally:
264
+ self._driver = None
265
+ self._selenium_setup = None
266
+
267
+ async def _google_site_search(
268
+ self,
269
+ company_name: str,
270
+ site: str,
271
+ additional_terms: str = "",
272
+ max_results: int = 5
273
+ ) -> GoogleSearchResult:
274
+ """
275
+ Perform Google site search for a company.
276
+
277
+ Args:
278
+ company_name: Company name to search for
279
+ site: Site domain to search within (e.g., "zoominfo.com")
280
+ additional_terms: Additional search terms (e.g., "Overview")
281
+ max_results: Maximum number of results
282
+
283
+ Returns:
284
+ GoogleSearchResult with first result URL
285
+ """
286
+ # Build search query
287
+ query = f"{company_name} {additional_terms}".strip()
288
+ search_query = f"site:{site} {query}"
289
+
290
+ self.logger.info(f"Google search: {search_query}")
291
+
292
+ try:
293
+ # Execute search
294
+ loop = asyncio.get_running_loop()
295
+ res = await loop.run_in_executor(
296
+ None,
297
+ lambda: self.service.cse().list( # pylint: disable=E1101 # noqa
298
+ q=search_query,
299
+ cx=self.google_cse_id,
300
+ num=max_results
301
+ ).execute()
302
+ )
303
+
304
+ items = res.get('items', [])
305
+
306
+ if not items:
307
+ self.logger.warning(
308
+ f"No results found for: {search_query}"
309
+ )
310
+ return GoogleSearchResult(
311
+ query=query,
312
+ site=site,
313
+ total_results=0
314
+ )
315
+
316
+ # Return first result
317
+ first = items[0]
318
+ return GoogleSearchResult(
319
+ query=query,
320
+ site=site,
321
+ url=first['link'],
322
+ title=first.get('title'),
323
+ snippet=first.get('snippet'),
324
+ total_results=len(items)
325
+ )
326
+
327
+ except Exception as e:
328
+ self.logger.error(f"Google search error: {e}")
329
+ return GoogleSearchResult(
330
+ query=query,
331
+ site=site,
332
+ total_results=0
333
+ )
334
+
335
+ async def _fetch_page_with_selenium(self, url: str) -> Optional[bs]:
336
+ """
337
+ Fetch a page using Selenium and return BeautifulSoup object.
338
+
339
+ Args:
340
+ url: URL to fetch
341
+
342
+ Returns:
343
+ BeautifulSoup object or None if failed
344
+ """
345
+ driver = await self._get_driver()
346
+
347
+ try:
348
+ self.logger.info(f"Fetching URL: {url}")
349
+
350
+ # Navigate to URL
351
+ loop = asyncio.get_running_loop()
352
+ await loop.run_in_executor(None, driver.get, url)
353
+
354
+ # Wait for page to load
355
+ await asyncio.sleep(2)
356
+
357
+ # Get page source
358
+ page_source = await loop.run_in_executor(
359
+ None,
360
+ lambda: driver.page_source
361
+ )
362
+ # Parse with BeautifulSoup
363
+ return bs(page_source, 'html.parser')
364
+
365
+ except TimeoutException:
366
+ self.logger.error(f"Timeout fetching: {url}")
367
+ return None
368
+ except Exception as e:
369
+ self.logger.error(f"Error fetching page: {e}")
370
+ return None
371
+
372
+ def _parse_address(self, address_text: str) -> Dict[str, Optional[str]]:
373
+ """
374
+ Parse an address string into components.
375
+
376
+ Args:
377
+ address_text: Full address string
378
+
379
+ Returns:
380
+ Dictionary with address, city, state, zip_code, country
381
+ """
382
+ result = {
383
+ 'address': address_text,
384
+ 'city': None,
385
+ 'state': None,
386
+ 'zip_code': None,
387
+ 'country': None
388
+ }
389
+
390
+ # Simple parsing logic - can be enhanced
391
+ parts = [p.strip() for p in address_text.split(',')]
392
+
393
+ if len(parts) >= 2:
394
+ result['city'] = parts[0]
395
+ result['country'] = parts[-1]
396
+
397
+ if len(parts) >= 3:
398
+ # Try to extract state and zip
399
+ state_zip = parts[-2].strip()
400
+ if match := re.search(r'([A-Z]{2})\s+(\d{5}(?:-\d{4})?)', state_zip):
401
+ result['state'] = match[1]
402
+ result['zip_code'] = match[2]
403
+
404
+ return result
405
+
406
+ def _standardize_name(self, name: str) -> str:
407
+ """Standardize company name for searching."""
408
+ # Remove common suffixes
409
+ suffixes = [
410
+ 'Inc.', 'Inc', 'LLC', 'Ltd.', 'Ltd', 'Corporation',
411
+ 'Corp.', 'Corp', 'Company', 'Co.', 'Co'
412
+ ]
413
+
414
+ cleaned = name
415
+ for suffix in suffixes:
416
+ cleaned = re.sub(
417
+ rf'\b{re.escape(suffix)}\b',
418
+ '',
419
+ cleaned,
420
+ flags=re.IGNORECASE
421
+ )
422
+
423
+ return cleaned.strip()
424
+
425
+ # ===========================
426
+ # Platform-Specific Methods (Tools)
427
+ # ===========================
428
+
429
+ @tool_schema(CompanyInput)
430
+ async def scrape_zoominfo(
431
+ self,
432
+ company_name: str,
433
+ return_json: bool = False
434
+ ) -> Union[CompanyInfo, str]:
435
+ """
436
+ Scrape company information from ZoomInfo.
437
+
438
+ Args:
439
+ company_name: Name of the company to search for
440
+ return_json: If True, return JSON string instead of CompanyInfo object
441
+
442
+ Returns:
443
+ CompanyInfo object or JSON string with company data
444
+ """
445
+ site = "zoominfo.com"
446
+ search_term = f"site:zoominfo.com {company_name} Overview"
447
+
448
+ # Initialize result
449
+ result = CompanyInfo(
450
+ search_term=search_term,
451
+ source_platform='zoominfo',
452
+ scrape_status='pending',
453
+ timestamp=str(time.time())
454
+ )
455
+
456
+ try:
457
+ # 1. Google site search
458
+ search_result = await self._google_site_search(
459
+ company_name=company_name,
460
+ site=site,
461
+ additional_terms="Overview"
462
+ )
463
+
464
+ if not search_result.url:
465
+ result.scrape_status = 'no_data'
466
+ result.error_message = 'No search results found'
467
+ return result.to_json() if return_json else result
468
+
469
+ result.search_url = search_result.url
470
+
471
+ # 2. Fetch page with Selenium
472
+ document = await self._fetch_page_with_selenium(search_result.url)
473
+
474
+ if not document:
475
+ result.scrape_status = 'error'
476
+ result.error_message = 'Failed to fetch page'
477
+ return result.to_json() if return_json else result
478
+
479
+ # 3. Parse company information
480
+ # Company name
481
+ if company_header := document.select_one("h2#company-description-text-header"):
482
+ result.company_name = company_header.text.strip()
483
+
484
+ # Headquarters
485
+ if hq_elem := document.select_one(".icon-label:-soup-contains('Headquarters') + .content"):
486
+ result.headquarters = hq_elem.text.strip()
487
+
488
+ # Phone
489
+ if phone_elem := document.select_one(".icon-label:-soup-contains('Phone Number') + .content"):
490
+ result.phone_number = phone_elem.text.strip()
491
+
492
+ # Website
493
+ if website_elem := document.select_one(".icon-label:-soup-contains('Website') + a"):
494
+ result.website = website_elem.get('href')
495
+
496
+ # Revenue
497
+ if revenue_elem := document.select_one(".icon-label:-soup-contains('Revenue') + .content"):
498
+ result.revenue_range = revenue_elem.text.strip()
499
+
500
+ # Stock symbol
501
+ if stock_elem := document.select_one(".icon-label:-soup-contains('Stock Symbol') + .content"):
502
+ result.stock_symbol = stock_elem.text.strip()
503
+
504
+ # Industry
505
+ if industry_elems := document.select("#company-chips-wrapper a"):
506
+ result.industry = [i.text.strip() for i in industry_elems]
507
+
508
+ # Description
509
+ if desc_elem := document.select_one("#company-description-text-content .company-desc"):
510
+ result.company_description = desc_elem.text.strip()
511
+
512
+ # NAICS and SIC codes
513
+ codes_section = document.select("#codes-wrapper .codes-content")
514
+ for code in codes_section:
515
+ text = code.text.strip()
516
+ if "NAICS Code" in text:
517
+ result.naics_code = text.replace("NAICS Code", "").strip()
518
+ elif "SIC Code" in text:
519
+ result.sic_code = text.replace("SIC Code", "").strip()
520
+
521
+ # Executives
522
+ exec_elems = document.select(".org-chart .person-right-content")
523
+ executives = []
524
+ for exec_elem in exec_elems:
525
+ if name_elem := exec_elem.select_one(".person-name"):
526
+ executives.append({
527
+ "name": name_elem.text.strip(),
528
+ "title": exec_elem.select_one(".job-title").text.strip() if exec_elem.select_one(".job-title") else "",
529
+ "profile_link": name_elem.get('href', '')
530
+ })
531
+ if executives:
532
+ result.executives = executives
533
+
534
+ # Check if we found meaningful data
535
+ has_data = any([
536
+ result.company_name,
537
+ result.headquarters,
538
+ result.phone_number,
539
+ result.website,
540
+ result.revenue_range
541
+ ])
542
+
543
+ result.scrape_status = 'success' if has_data else 'no_data'
544
+
545
+ except Exception as e:
546
+ self.logger.error(f"Error scraping ZoomInfo: {e}")
547
+ result.scrape_status = 'error'
548
+ result.error_message = str(e)[:100]
549
+ finally:
550
+ await self._close_driver()
551
+
552
+ return result.to_json() if return_json else result
553
+
554
+ @tool_schema(CompanyInput)
555
+ async def scrape_explorium(
556
+ self,
557
+ company_name: str,
558
+ return_json: bool = False
559
+ ) -> Union[CompanyInfo, str]:
560
+ """
561
+ Scrape company information from Explorium.ai.
562
+
563
+ Args:
564
+ company_name: Name of the company to search for
565
+ return_json: If True, return JSON string instead of CompanyInfo object
566
+
567
+ Returns:
568
+ CompanyInfo object or JSON string with company data
569
+ """
570
+ site = "explorium.ai"
571
+ search_term = f"site:explorium.ai {company_name}"
572
+
573
+ result = CompanyInfo(
574
+ search_term=search_term,
575
+ source_platform='explorium',
576
+ scrape_status='pending',
577
+ timestamp=str(time.time())
578
+ )
579
+
580
+ try:
581
+ # Google site search
582
+ search_result = await self._google_site_search(
583
+ company_name=company_name,
584
+ site=site,
585
+ additional_terms="overview - services"
586
+ )
587
+
588
+ if not search_result.url:
589
+ result.scrape_status = 'no_data'
590
+ result.error_message = 'No search results found'
591
+ return result.to_json() if return_json else result
592
+
593
+ result.search_url = search_result.url
594
+
595
+ # Fetch page
596
+ document = await self._fetch_page_with_selenium(search_result.url)
597
+
598
+ if not document:
599
+ result.scrape_status = 'error'
600
+ result.error_message = 'Failed to fetch page'
601
+ return result.to_json() if return_json else result
602
+
603
+ # Parse data
604
+ # Company name from header
605
+ name_elem = document.find('h1', {'data-id': 'txt-company-name'})
606
+ if name_elem:
607
+ result.company_name = name_elem.text.strip()
608
+
609
+ # Address
610
+ if address_section := document.find('div', {'data-id': 'info-address'}):
611
+ if address_elem := address_section.find('p', {'aria-label': True}):
612
+ address_text = address_elem.get('aria-label', '').strip()
613
+ result.headquarters = address_text
614
+
615
+ # Extract country
616
+ country = address_text.split(',')[-1].strip()
617
+ result.country = country or None
618
+
619
+ # Company description
620
+ desc_elem = document.find('p', {'class': 'ExpTypography-root ExpTypography-body1'})
621
+ if desc_elem and name_elem:
622
+ result.company_description = f"{name_elem.text.strip()}: {desc_elem.text.strip()}"
623
+
624
+ # Logo
625
+ if logo_elem := document.find('img', {'alt': True, 'src': True}):
626
+ result.logo_url = logo_elem['src']
627
+
628
+ # NAICS codes
629
+ if naics_section := document.find('div', {'data-id': 'company-stat-naics'}):
630
+ naics_entries = naics_section.find_all('p', {'class': 'ExpTypography-root'})
631
+ naics_codes = []
632
+ industries = []
633
+ for entry in naics_entries:
634
+ code = entry.text.strip().strip(',')
635
+ industry_desc = entry.get('aria-label', '').strip()
636
+ if code:
637
+ naics_codes.append(code)
638
+ if industry_desc:
639
+ industries.append(industry_desc)
640
+
641
+ if naics_codes:
642
+ result.naics_code = ', '.join(naics_codes)
643
+ if industries:
644
+ result.industry = ', '.join(industries)
645
+
646
+ # SIC codes
647
+ if sic_section := document.find('div', {'data-id': 'company-stat-sic'}):
648
+ sic_entries = sic_section.find_all('p', {'class': 'ExpTypography-root'})
649
+ sic_codes = []
650
+ for entry in sic_entries:
651
+ if code := entry.text.strip().strip(','):
652
+ sic_codes.append(code)
653
+
654
+ if sic_codes:
655
+ result.sic_code = ', '.join(sic_codes)
656
+
657
+ # Check for data
658
+ has_data = any([
659
+ result.company_name,
660
+ result.headquarters,
661
+ result.naics_code,
662
+ result.sic_code
663
+ ])
664
+
665
+ result.scrape_status = 'success' if has_data else 'no_data'
666
+
667
+ except Exception as e:
668
+ self.logger.error(f"Error scraping Explorium: {e}")
669
+ result.scrape_status = 'error'
670
+ result.error_message = str(e)[:100]
671
+ finally:
672
+ await self._close_driver()
673
+
674
+ return result.to_json() if return_json else result
675
+
676
+ @tool_schema(CompanyInput)
677
+ async def scrape_leadiq(
678
+ self,
679
+ company_name: str,
680
+ return_json: bool = False
681
+ ) -> Union[CompanyInfo, str]:
682
+ """
683
+ Scrape company information from LeadIQ.
684
+
685
+ Args:
686
+ company_name: Name of the company to search for
687
+ return_json: If True, return JSON string instead of CompanyInfo object
688
+
689
+ Returns:
690
+ CompanyInfo object or JSON string with company data
691
+ """
692
+ site = "leadiq.com"
693
+ standardized_name = self._standardize_name(company_name)
694
+ search_term = f"site:leadiq.com {standardized_name}"
695
+
696
+ result = CompanyInfo(
697
+ search_term=search_term,
698
+ source_platform='leadiq',
699
+ scrape_status='pending',
700
+ timestamp=str(time.time())
701
+ )
702
+
703
+ try:
704
+ # Google site search
705
+ search_result = await self._google_site_search(
706
+ company_name=standardized_name,
707
+ site=site,
708
+ additional_terms="Company Overview"
709
+ )
710
+
711
+ if not search_result.url:
712
+ result.scrape_status = 'no_data'
713
+ result.error_message = 'No search results found'
714
+ return result.to_json() if return_json else result
715
+
716
+ result.search_url = search_result.url
717
+
718
+ # Fetch page
719
+ document = await self._fetch_page_with_selenium(search_result.url)
720
+
721
+ if not document:
722
+ result.scrape_status = 'error'
723
+ result.error_message = 'Failed to fetch page'
724
+ return result.to_json() if return_json else result
725
+
726
+ # Parse data
727
+ # Company logo and name
728
+ if logo := document.find('img', {'alt': True, 'width': '76.747'}):
729
+ result.company_name = logo.get('alt')
730
+ result.logo_url = logo.get('src')
731
+
732
+ # Revenue range
733
+ if highlight_right := document.find('div', {'class': 'highlight-right'}):
734
+ if revenue_span := highlight_right.find('span', {'class': 'start'}):
735
+ start_value = revenue_span.text.strip()
736
+ if end_span := revenue_span.find_next_sibling('span', {'class': 'end'}):
737
+ end_value = end_span.text.strip()
738
+ result.revenue_range = f"{start_value} - {end_value}"
739
+ else:
740
+ result.revenue_range = start_value
741
+
742
+ # Company details
743
+ if highlight_left := document.find('div', {'class': 'highlight-left'}):
744
+ if overview_section := highlight_left.find('div', {'class': 'card span'}):
745
+ if dl_element := overview_section.find('dl'):
746
+ for item in dl_element.find_all('div', {'class': 'item'}):
747
+ dt = item.find('dt')
748
+ dd = item.find('dd')
749
+ if dt and dd:
750
+ field = dt.text.strip().lower()
751
+ value = dd.text.strip()
752
+
753
+ if field == 'headquarters':
754
+ address_info = self._parse_address(value)
755
+ result.headquarters = value
756
+ result.address = address_info.get('address')
757
+ result.city = address_info.get('city')
758
+ result.state = address_info.get('state')
759
+ result.zip_code = address_info.get('zip_code')
760
+ result.country = address_info.get('country')
761
+ elif field == 'phone number':
762
+ result.phone_number = value.replace('****', '0000')
763
+ elif field == 'website':
764
+ website = dd.find('a')
765
+ result.website = website['href'] if website else value
766
+ elif field == 'stock symbol':
767
+ result.stock_symbol = value
768
+ elif field == 'naics code':
769
+ result.naics_code = value
770
+ elif field == 'employees':
771
+ result.employee_count = value
772
+ elif field == 'sic code':
773
+ result.sic_code = value
774
+
775
+ # Hero section
776
+ if hero_section := document.find('div', {'class': 'card hero snug'}):
777
+ # Company name
778
+ if company_name_elem := hero_section.find('h1'):
779
+ result.company_name = company_name_elem.text.strip()
780
+
781
+ # Industry, location, employees
782
+ if info_p := hero_section.find('p', {'class': 'info'}):
783
+ spans = info_p.find_all('span')
784
+ if len(spans) >= 3:
785
+ if not result.industry:
786
+ result.industry = spans[0].text.strip()
787
+ result.number_employees = spans[2].text.strip()
788
+
789
+ # Description
790
+ if description_p := hero_section.find('pre'):
791
+ result.company_description = description_p.text.strip()
792
+
793
+ # Similar companies
794
+ similar_companies = []
795
+ if similar_section := document.find('div', {'id': 'similar'}):
796
+ for company in similar_section.find_all('li'):
797
+ company_link = company.find('a')
798
+ if not company_link:
799
+ continue
800
+
801
+ company_logo = company_link.find('img')
802
+ if company_name_elem := company_link.find('h3'):
803
+ similar_company = {
804
+ 'name': company_name_elem.text.strip(),
805
+ 'leadiq_url': company_link['href'],
806
+ 'logo_url': company_logo['src'] if company_logo else None
807
+ }
808
+ similar_companies.append(similar_company)
809
+
810
+ if similar_companies:
811
+ result.similar_companies = json.dumps(
812
+ similar_companies,
813
+ ensure_ascii=False
814
+ )
815
+
816
+ # Check for data
817
+ has_data = any([
818
+ result.company_name,
819
+ result.logo_url,
820
+ result.headquarters,
821
+ result.phone_number,
822
+ result.website
823
+ ])
824
+
825
+ result.scrape_status = 'success' if has_data else 'no_data'
826
+
827
+ except Exception as e:
828
+ self.logger.error(f"Error scraping LeadIQ: {e}")
829
+ result.scrape_status = 'error'
830
+ result.error_message = str(e)[:100]
831
+ finally:
832
+ await self._close_driver()
833
+
834
+ return result.to_json() if return_json else result
835
+
836
+ @tool_schema(CompanyInput)
837
+ async def scrape_rocketreach(
838
+ self,
839
+ company_name: str,
840
+ return_json: bool = False
841
+ ) -> Union[CompanyInfo, str]:
842
+ """
843
+ Scrape company information from RocketReach.
844
+
845
+ Args:
846
+ company_name: Name of the company to search for
847
+ return_json: If True, return JSON string instead of CompanyInfo object
848
+
849
+ Returns:
850
+ CompanyInfo object or JSON string with company data
851
+ """
852
+ site = "rocketreach.co"
853
+ search_term = f"site:rocketreach.co '{company_name}'"
854
+
855
+ result = CompanyInfo(
856
+ search_term=search_term,
857
+ source_platform='rocketreach',
858
+ scrape_status='pending',
859
+ timestamp=str(time.time())
860
+ )
861
+
862
+ try:
863
+ # Google site search
864
+ search_result = await self._google_site_search(
865
+ company_name=company_name,
866
+ site=site,
867
+ additional_terms=" Information - RocketReach"
868
+ )
869
+
870
+ if not search_result.url:
871
+ result.scrape_status = 'no_data'
872
+ result.error_message = 'No search results found'
873
+ return result.to_json() if return_json else result
874
+
875
+ result.search_url = search_result.url
876
+
877
+ # Fetch page
878
+ document = await self._fetch_page_with_selenium(search_result.url)
879
+
880
+ if not document:
881
+ result.scrape_status = 'error'
882
+ result.error_message = 'Failed to fetch page'
883
+ return result.to_json() if return_json else result
884
+
885
+ # Parse data
886
+ # Company header
887
+ if company_header := document.select_one(".company-header"):
888
+ # Logo
889
+ img_tag = company_header.select_one(".company-logo")
890
+ result.logo_url = img_tag["src"] if img_tag else None
891
+
892
+ # Company name
893
+ if title_tag := company_header.select_one(".company-title"):
894
+ result.company_name = title_tag.text.replace(" Information", "").strip()
895
+
896
+ # Description
897
+ headline_summary = document.select_one(".headline-summary p")
898
+ result.company_description = headline_summary.text.strip() if headline_summary else None
899
+
900
+ # Information table
901
+ info_table = document.select(".headline-summary table tbody tr")
902
+ for row in info_table:
903
+ key = row.select_one("td strong")
904
+ value = row.select_one("td:nth-of-type(2)")
905
+
906
+ if key and value:
907
+ key_text = key.text.strip().lower()
908
+ value_text = value.text.strip()
909
+
910
+ if "website" in key_text:
911
+ result.website = value.select_one("a")["href"] if value.select_one("a") else value_text
912
+ elif "ticker" in key_text:
913
+ result.stock_symbol = value_text
914
+ elif "revenue" in key_text:
915
+ result.revenue_range = value_text
916
+ elif "funding" in key_text:
917
+ result.funding = value_text
918
+ elif "employees" in key_text:
919
+ result.employee_count = value_text.split()[0]
920
+ result.number_employees = value_text
921
+ elif "founded" in key_text:
922
+ result.founded = value_text
923
+ elif "address" in key_text:
924
+ result.headquarters = value.select_one("a").text.strip() if value.select_one("a") else value_text
925
+ elif "phone" in key_text:
926
+ result.phone_number = value.select_one("a").text.strip() if value.select_one("a") else value_text
927
+ elif "industry" in key_text:
928
+ result.industry = [i.strip() for i in value_text.split(",")]
929
+ elif "keywords" in key_text:
930
+ result.keywords = [i.strip() for i in value_text.split(",")]
931
+ elif "sic" in key_text:
932
+ # Extract codes
933
+ codes = []
934
+ for link in value.find_all("a"):
935
+ if match := re.search(r"\b\d+\b", link.text):
936
+ codes.append(match.group())
937
+ result.sic_code = ', '.join(codes) if codes else None
938
+ elif "naics" in key_text:
939
+ # Extract codes
940
+ codes = []
941
+ for link in value.find_all("a"):
942
+ if match := re.search(r"\b\d+\b", link.text):
943
+ codes.append(match.group())
944
+ result.naics_code = ', '.join(codes) if codes else None
945
+
946
+ # Check for data
947
+ has_data = any([
948
+ result.company_name,
949
+ result.logo_url,
950
+ result.headquarters,
951
+ result.phone_number,
952
+ result.website
953
+ ])
954
+
955
+ result.scrape_status = 'success' if has_data else 'no_data'
956
+
957
+ except Exception as e:
958
+ self.logger.error(f"Error scraping RocketReach: {e}")
959
+ result.scrape_status = 'error'
960
+ result.error_message = str(e)[:100]
961
+ finally:
962
+ await self._close_driver()
963
+
964
+ return result.to_json() if return_json else result
965
+
966
+ @tool_schema(CompanyInput)
967
+ async def scrape_siccode(
968
+ self,
969
+ company_name: str,
970
+ return_json: bool = False
971
+ ) -> Union[CompanyInfo, str]:
972
+ """
973
+ Scrape company information from SICCode.com.
974
+
975
+ Args:
976
+ company_name: Name of the company to search for
977
+ return_json: If True, return JSON string instead of CompanyInfo object
978
+
979
+ Returns:
980
+ CompanyInfo object or JSON string with company data
981
+ """
982
+ site = "siccode.com"
983
+ search_term = f"site:siccode.com '{company_name}' +NAICS"
984
+
985
+ result = CompanyInfo(
986
+ search_term=search_term,
987
+ source_platform='siccode',
988
+ scrape_status='pending',
989
+ timestamp=str(time.time())
990
+ )
991
+
992
+ try:
993
+ # Google site search
994
+ search_result = await self._google_site_search(
995
+ company_name=company_name,
996
+ site=site,
997
+ additional_terms="+NAICS"
998
+ )
999
+
1000
+ if not search_result.url:
1001
+ result.scrape_status = 'no_data'
1002
+ result.error_message = 'No search results found'
1003
+ return result.to_json() if return_json else result
1004
+
1005
+ result.search_url = search_result.url
1006
+
1007
+ # Fetch page
1008
+ document = await self._fetch_page_with_selenium(search_result.url)
1009
+
1010
+ if not document:
1011
+ result.scrape_status = 'error'
1012
+ result.error_message = 'Failed to fetch page'
1013
+ return result.to_json() if return_json else result
1014
+
1015
+ # Parse data
1016
+ if header := document.select_one("div.main-title"):
1017
+ # Company name
1018
+ if name_elem := header.select_one("h1.size-h2 a span"):
1019
+ result.company_name = name_elem.text.strip()
1020
+
1021
+ # Industry category
1022
+ if cat_elem := header.select_one("b.p-category"):
1023
+ result.industry_category = cat_elem.text.strip()
1024
+
1025
+ # SIC and NAICS codes
1026
+ if desc := document.find('div', {'id': 'description'}):
1027
+ sic_code_elem = desc.select_one("a.sic")
1028
+ naics_code_elem = desc.select_one("a.naics")
1029
+
1030
+ if sic_code_elem:
1031
+ sic_text = sic_code_elem.text.split("SIC CODE")[-1].strip()
1032
+ if ' - ' in sic_text:
1033
+ parts = sic_text.split(' - ')
1034
+ result.sic_code = parts[0].strip()
1035
+ result.industry = parts[1].strip() if len(parts) > 1 else None
1036
+
1037
+ if naics_code_elem:
1038
+ naics_text = naics_code_elem.text.split("NAICS CODE")[-1].strip()
1039
+ if ' - ' in naics_text:
1040
+ parts = naics_text.split(' - ')
1041
+ result.naics_code = parts[0].strip()
1042
+ result.category = parts[1].strip() if len(parts) > 1 else None
1043
+
1044
+ # Location details
1045
+ if overview := document.find('div', {'id': 'overview'}):
1046
+ # Description
1047
+ if desc_elem := overview.select_one("p.p-note"):
1048
+ result.company_description = desc_elem.text.strip()
1049
+
1050
+ # Location fields
1051
+ city_elem = overview.select_one(".p-locality")
1052
+ state_elem = overview.select_one(".p-region")
1053
+ zip_elem = overview.select_one(".p-postal-code")
1054
+ country_elem = overview.select_one(".p-country-name")
1055
+ metro_elem = overview.select_one("div[title]")
1056
+
1057
+ if city_elem:
1058
+ result.city = city_elem.text.strip()
1059
+ if state_elem:
1060
+ result.state = state_elem.text.strip()
1061
+ if zip_elem:
1062
+ result.zip_code = zip_elem.text.strip()
1063
+ if country_elem:
1064
+ result.country = country_elem.text.strip()
1065
+ if metro_elem:
1066
+ result.metro_area = metro_elem.text.strip()
1067
+
1068
+ # Construct headquarters
1069
+ parts = [result.city, result.state, result.zip_code, result.country]
1070
+ result.headquarters = ", ".join(filter(None, parts))
1071
+
1072
+ # Check for data
1073
+ has_data = any([
1074
+ result.company_name,
1075
+ result.sic_code,
1076
+ result.naics_code,
1077
+ result.headquarters
1078
+ ])
1079
+
1080
+ result.scrape_status = 'success' if has_data else 'no_data'
1081
+
1082
+ except Exception as e:
1083
+ self.logger.error(f"Error scraping SICCode: {e}")
1084
+ result.scrape_status = 'error'
1085
+ result.error_message = str(e)[:100]
1086
+ finally:
1087
+ await self._close_driver()
1088
+
1089
+ return result.to_json() if return_json else result
1090
+
1091
+ @tool_schema(CompanyInput)
1092
+ async def scrape_all_sources(
1093
+ self,
1094
+ company_name: str,
1095
+ return_json: bool = False
1096
+ ) -> Union[List[CompanyInfo], str]:
1097
+ """
1098
+ Scrape company information from ALL available sources.
1099
+
1100
+ This method runs all scraping tools in parallel and returns
1101
+ aggregated results from all platforms.
1102
+
1103
+ Args:
1104
+ company_name: Name of the company to search for
1105
+ return_json: If True, return JSON string instead of list of CompanyInfo objects
1106
+
1107
+ Returns:
1108
+ List of CompanyInfo objects or JSON string with all results
1109
+ """
1110
+ self.logger.info(f"Scraping all sources for: {company_name}")
1111
+
1112
+ # Run all scraping methods in parallel
1113
+ tasks = [
1114
+ self.scrape_zoominfo(company_name, return_json=False),
1115
+ self.scrape_explorium(company_name, return_json=False),
1116
+ self.scrape_leadiq(company_name, return_json=False),
1117
+ self.scrape_rocketreach(company_name, return_json=False),
1118
+ self.scrape_siccode(company_name, return_json=False)
1119
+ ]
1120
+
1121
+ results = await asyncio.gather(*tasks, return_exceptions=True)
1122
+
1123
+ # Filter out exceptions and failed results
1124
+ valid_results = []
1125
+ for result in results:
1126
+ if isinstance(result, Exception):
1127
+ self.logger.error(f"Scraping error: {result}")
1128
+ elif isinstance(result, CompanyInfo):
1129
+ valid_results.append(result)
1130
+
1131
+ if return_json:
1132
+ return json.dumps(
1133
+ [r.model_dump(exclude_none=True) for r in valid_results],
1134
+ ensure_ascii=False,
1135
+ indent=2
1136
+ )
1137
+
1138
+ return valid_results