crawlemoon 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. crawlemoon-1.1.5.dist-info/METADATA +197 -0
  2. crawlemoon-1.1.5.dist-info/RECORD +59 -0
  3. crawlemoon-1.1.5.dist-info/WHEEL +5 -0
  4. crawlemoon-1.1.5.dist-info/entry_points.txt +4 -0
  5. crawlemoon-1.1.5.dist-info/licenses/LICENSE +23 -0
  6. crawlemoon-1.1.5.dist-info/top_level.txt +1 -0
  7. src/__init__.py +47 -0
  8. src/cli/__init__.py +9 -0
  9. src/cli/main.py +334 -0
  10. src/core/__init__.py +9 -0
  11. src/core/browser/__init__.py +9 -0
  12. src/core/browser/cdp.py +102 -0
  13. src/core/browser/pool.py +269 -0
  14. src/core/browser/proxy_pool.py +322 -0
  15. src/core/browser/stealth.py +341 -0
  16. src/core/browser/xray.py +845 -0
  17. src/core/cache/__init__.py +9 -0
  18. src/core/cache/manager.py +357 -0
  19. src/core/http/__init__.py +2 -0
  20. src/core/http/stealth_client.py +259 -0
  21. src/core/logging.py +129 -0
  22. src/core/rate_limiter.py +332 -0
  23. src/core/recording_storage.py +375 -0
  24. src/core/session/__init__.py +9 -0
  25. src/core/session/manager.py +451 -0
  26. src/crawlers/__init__.py +9 -0
  27. src/exceptions.py +202 -0
  28. src/intelligence/__init__.py +9 -0
  29. src/intelligence/extraction/__init__.py +2 -0
  30. src/intelligence/extraction/content.py +307 -0
  31. src/intelligence/extraction/smart.py +534 -0
  32. src/intelligence/generator/__init__.py +9 -0
  33. src/intelligence/generator/crawler_gen.py +400 -0
  34. src/intelligence/js/__init__.py +9 -0
  35. src/intelligence/js/analyzer.py +305 -0
  36. src/intelligence/js/deobfuscator.py +154 -0
  37. src/intelligence/js/dynamic.py +112 -0
  38. src/intelligence/network/__init__.py +9 -0
  39. src/intelligence/network/analyzer.py +297 -0
  40. src/intelligence/network/api_discovery.py +581 -0
  41. src/intelligence/network/graphql.py +49 -0
  42. src/intelligence/network/interceptor.py +250 -0
  43. src/intelligence/network/sitemap.py +229 -0
  44. src/intelligence/network/websocket.py +69 -0
  45. src/intelligence/recorder/__init__.py +9 -0
  46. src/intelligence/recorder/session.py +388 -0
  47. src/intelligence/recorder/state_machine.py +133 -0
  48. src/intelligence/security/__init__.py +9 -0
  49. src/intelligence/security/auth.py +114 -0
  50. src/intelligence/security/bot_detection.py +341 -0
  51. src/intelligence/security/captcha_solver.py +534 -0
  52. src/intelligence/security/technology_detector.py +210 -0
  53. src/mcp/__init__.py +9 -0
  54. src/mcp/config.py +151 -0
  55. src/mcp/schemas.py +454 -0
  56. src/mcp/server.py +4603 -0
  57. src/mcp/tools/__init__.py +9 -0
  58. src/mcp/utils.py +297 -0
  59. src/sites/__init__.py +10 -0
@@ -0,0 +1,197 @@
1
+ Metadata-Version: 2.4
2
+ Name: crawlemoon
3
+ Version: 1.1.5
4
+ Summary: Advanced Web Crawling Platform with Deep Analysis and MCP Server
5
+ Author-email: "emad.dev" <contact@emad.dev>
6
+ License: MIT
7
+ Keywords: crawling,scraping,automation,mcp,web-analysis,playwright,api-discovery
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: System :: Networking
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: playwright>=1.40.0
22
+ Requires-Dist: mcp>=1.0.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Requires-Dist: pydantic-settings>=2.0.0
25
+ Requires-Dist: aiohttp>=3.9.0
26
+ Requires-Dist: httpx>=0.25.0
27
+ Requires-Dist: websockets>=12.0
28
+ Requires-Dist: pyyaml>=6.0
29
+ Requires-Dist: python-json-logger>=2.0.0
30
+ Requires-Dist: tenacity>=8.2.0
31
+ Requires-Dist: cachetools>=5.3.0
32
+ Requires-Dist: graphql-core>=3.2.0
33
+ Requires-Dist: esprima>=4.0.0
34
+ Requires-Dist: beautifulsoup4>=4.12.0
35
+ Requires-Dist: lxml>=5.0.0
36
+ Requires-Dist: cryptography>=41.0.0
37
+ Requires-Dist: curl-cffi>=0.6.0
38
+ Requires-Dist: fake-useragent>=1.4.0
39
+ Requires-Dist: trafilatura>=1.6.0
40
+ Requires-Dist: selectolax>=0.3.0
41
+ Requires-Dist: markdownify>=0.11.0
42
+ Requires-Dist: instructor>=1.0.0
43
+ Requires-Dist: openai>=1.0.0
44
+ Requires-Dist: python-Wappalyzer>=0.3.0
45
+ Requires-Dist: ftfy>=6.1.0
46
+ Requires-Dist: dateparser>=1.2.0
47
+ Requires-Dist: url-normalize>=1.4.0
48
+ Requires-Dist: tldextract>=5.0.0
49
+ Provides-Extra: captcha
50
+ Requires-Dist: python-anticaptcha>=1.0.0; extra == "captcha"
51
+ Requires-Dist: capsolver>=1.0.0; extra == "captcha"
52
+ Provides-Extra: ocr
53
+ Requires-Dist: pytesseract>=0.3.10; extra == "ocr"
54
+ Requires-Dist: Pillow>=10.0.0; extra == "ocr"
55
+ Provides-Extra: dev
56
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
57
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
58
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
59
+ Requires-Dist: black>=23.0.0; extra == "dev"
60
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
61
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
62
+ Requires-Dist: pip-audit>=2.7.0; extra == "dev"
63
+ Requires-Dist: pre-commit>=3.6.0; extra == "dev"
64
+ Requires-Dist: build>=1.0.0; extra == "dev"
65
+ Requires-Dist: twine>=4.0.0; extra == "dev"
66
+ Provides-Extra: all
67
+ Requires-Dist: crawlemoon[captcha,dev,ocr]; extra == "all"
68
+ Dynamic: license-file
69
+
70
+ # Crawlemoon MCP Server
71
+
72
+ <p align="center">
73
+ <img src="https://raw.githubusercontent.com/razavioo/crawlemoon-mcp-server/main/assets/hero.png" alt="Crawlemoon MCP Server — free, AI-native web crawling for the agent era" width="100%"/>
74
+ </p>
75
+
76
+ <p align="left">
77
+ <img alt="python 3.10+ · pypi 1.1.0 · MIT · MCP-native · code style black" src="https://raw.githubusercontent.com/razavioo/crawlemoon-mcp-server/main/assets/badges.png" height="22"/>
78
+ </p>
79
+
80
+ A **free, open-source MCP server** that gives any agent (Claude Code, Cursor, Windsurf, …) **55 production-grade tools** for the full web-crawling stack: deep analysis, stealth, API discovery, session recording → runnable crawler, smart extraction. No proprietary API. No per-request fee.
81
+
82
+ <p align="center">
83
+ <img src="https://raw.githubusercontent.com/razavioo/crawlemoon-mcp-server/main/assets/features.png" alt="Crawlemoon capabilities — deep analysis, stealth, record→crawler, smart extraction" width="100%"/>
84
+ </p>
85
+
86
+ ---
87
+
88
+ ## Quick start
89
+
90
+ <p align="center">
91
+ <img src="https://raw.githubusercontent.com/razavioo/crawlemoon-mcp-server/main/assets/install.png" alt="Three install paths — uvx, pipx, pip" width="100%"/>
92
+ </p>
93
+
94
+ The recommended path needs no install — `uvx` runs straight from PyPI:
95
+
96
+ ```json
97
+ {
98
+ "mcpServers": {
99
+ "crawlemoon": {
100
+ "command": "uvx",
101
+ "args": ["crawlemoon-mcp-server"]
102
+ }
103
+ }
104
+ }
105
+ ```
106
+
107
+ > Requires [`uv`](https://docs.astral.sh/uv/getting-started/installation/). Install once: `curl -LsSf https://astral.sh/uv/install.sh | sh`. Or use `pipx run crawlemoon-mcp-server` / `pip install crawlemoon-mcp-server` instead.
108
+
109
+ **Where to put that JSON:** Cursor → Settings → MCP. Claude Code → `~/.config/claude/mcp_settings.json`. Windsurf → Settings → MCP Servers.
110
+
111
+ ---
112
+
113
+ ## How it works
114
+
115
+ <p align="center">
116
+ <img src="https://raw.githubusercontent.com/razavioo/crawlemoon-mcp-server/main/assets/architecture.png" alt="Agent → Crawlemoon → Browser/HTTP/Proxy → target web" width="100%"/>
117
+ </p>
118
+
119
+ Your agent talks to Crawlemoon over the Model Context Protocol. Crawlemoon owns a hardened browser pool, an HTTP stack with TLS fingerprinting, and a rotating proxy pool. While it fetches pages, it captures network traffic, reads scripts, and introspects schemas — so the agent gets clean structured data, not raw HTML.
120
+
121
+ ---
122
+
123
+ ## What's in the box
124
+
125
+ A short list — see the source for the full set of 55 tools.
126
+
127
+ | Group | Tools |
128
+ |---|---|
129
+ | **Deep analysis** | `deep_analyze`, `discover_apis`, `introspect_graphql`, `analyze_websocket`, `analyze_auth`, `detect_protection`, `detect_technology` |
130
+ | **Stealth** | `stealth_request`, `configure_proxies`, `configure_rate_limit`, `add_proxy`, `test_proxy` |
131
+ | **Record → crawler** | `record_session`, `stop_recording`, `export_recording`, `generate_crawler` |
132
+ | **Extraction** | `smart_extract`, `extract_article`, `extract_tables`, `extract_links`, `extract_forms`, `extract_metadata`, `convert_to_markdown` |
133
+ | **Page interaction** | `take_screenshot`, `fill_form`, `wait_and_extract`, `compare_pages`, `measure_performance`, `check_accessibility`, `get_dom_tree` |
134
+ | **Sessions & cache** | `save_session`, `load_session`, `get_cookies`, `get_storage`, `clear_cache`, `get_cache_stats` |
135
+ | **Advanced (opt-in)** | `execute_js`, `execute_cdp`, `deobfuscate_js`, `extract_from_js`, `solve_captcha` |
136
+
137
+ ---
138
+
139
+ ## Smart extraction — bring any LLM, including free ones
140
+
141
+ `smart_extract` works **without any API key** using pattern matching. Plug in any OpenAI-compatible endpoint for higher accuracy — including FREE tiers:
142
+
143
+ ```bash
144
+ # OpenRouter (free models exist)
145
+ CRAWLEMOON_LLM_PROVIDER=openrouter
146
+ CRAWLEMOON_LLM_API_KEY=sk-or-v1-xxx
147
+ CRAWLEMOON_LLM_MODEL=meta-llama/llama-3.2-3b-instruct:free
148
+
149
+ # Groq (free, very fast)
150
+ CRAWLEMOON_LLM_PROVIDER=groq
151
+ CRAWLEMOON_LLM_API_KEY=gsk_xxx
152
+
153
+ # Local Ollama (no key needed)
154
+ CRAWLEMOON_LLM_PROVIDER=ollama
155
+ CRAWLEMOON_LLM_MODEL=llama3.2
156
+ ```
157
+
158
+ Together, DeepSeek, Mistral, Fireworks, and standard OpenAI also work via `CRAWLEMOON_LLM_BASE_URL`.
159
+
160
+ ---
161
+
162
+ ## Configuration
163
+
164
+ | Variable | Default | Notes |
165
+ |---|---|---|
166
+ | `CRAWLEMOON_HEADLESS` | `true` | Run browser without UI |
167
+ | `CRAWLEMOON_BROWSER` | `chromium` | `chromium` / `firefox` / `webkit` |
168
+ | `CRAWLEMOON_POOL_SIZE` | `5` | Max concurrent browsers |
169
+ | `CRAWLEMOON_NAV_TIMEOUT` | `30.0` | Page-load timeout (s) |
170
+ | `CRAWLEMOON_API_KEY` | _unset_ | If set, every tool call must include matching `_api_key` |
171
+ | `CRAWLEMOON_ALLOW_DANGEROUS_JS` | `false` | Required for `execute_js` / `execute_cdp` / `deobfuscate_js` |
172
+ | `CRAWLEMOON_JS_MAX_LENGTH` | `50000` | Length cap for JS payloads |
173
+ | `CRAWLEMOON_JS_EXEC_TIMEOUT` | `10.0` | Per-script timeout (s) |
174
+
175
+ ---
176
+
177
+ ## Security
178
+
179
+ `execute_js`, `execute_cdp`, and `deobfuscate_js` are **disabled by default** — they execute or operate on arbitrary code in a real browser. Enable on trusted networks with `CRAWLEMOON_ALLOW_DANGEROUS_JS=true`. Even then, payloads are length-capped, time-bounded, and a denylist rejects `eval`, `new Function`, dynamic `import()`, `document.write`, `importScripts`, and `WebAssembly.{compile,instantiate}`. Set `CRAWLEMOON_API_KEY` so MCP clients must present a matching `_api_key`.
180
+
181
+ These are mitigations, not a sandbox: do not expose this server to untrusted clients.
182
+
183
+ ---
184
+
185
+ ## Develop
186
+
187
+ ```bash
188
+ git clone https://github.com/razavioo/crawlemoon-mcp-server.git
189
+ cd crawlemoon-mcp-server
190
+ make dev-install # editable install + dev/captcha/ocr extras + pre-commit
191
+ make test # pytest
192
+ make lint # ruff + mypy
193
+ ```
194
+
195
+ PRs welcome. Particularly interested in: distributed mode (Redis queue), result sinks (Postgres / S3), Prometheus metrics. See [`MIT License`](LICENSE).
196
+
197
+ <p align="center"><sub>Made by <a href="https://emad.dev">emad.dev</a></sub></p>
@@ -0,0 +1,59 @@
1
+ crawlemoon-1.1.5.dist-info/licenses/LICENSE,sha256=EoniT-RlPAS7H_GEDjPE8eYRQm1qnJZJEO6ZAbGQwC4,1067
2
+ src/__init__.py,sha256=iIWAh-JSeKw1qmFotGpGCZNWNggK32prcQESGD8gmO0,1089
3
+ src/exceptions.py,sha256=WZPkAf8zPqXBgE9TEhTxyym66HyKQgzzJtuywYyvatE,5946
4
+ src/cli/__init__.py,sha256=JhLoLUvGogoGoTJEiA6EZO_3mbNEiciqgnUNUsNNNMI,44
5
+ src/cli/main.py,sha256=8ZPOgBBS6CYKvlhFnssEAmVxbmo7SfURsj5vO4jBCis,11935
6
+ src/core/__init__.py,sha256=DmypqdOcvJaawHp4_mSgLaGBujKPr7YkcEfBXVmn-TU,38
7
+ src/core/logging.py,sha256=S-ZZmISmu_nb55zwQupI1av_Etwqd8JUDT0I0L0vhnk,4607
8
+ src/core/rate_limiter.py,sha256=8ZgExSv0v0VnxOPU3by8ZJ36JY9PaYE9_Xf6IWg2o9A,13172
9
+ src/core/recording_storage.py,sha256=h87cvXtkBcu0j394-EykYnFVTEs5tgSG-cPTPFPwT6o,14958
10
+ src/core/browser/__init__.py,sha256=qZ3R3fK50GAZzHnmOn9Mzywih8u0LVXW5IVpuyH3FvA,49
11
+ src/core/browser/cdp.py,sha256=OncjdlCeQOZOOggom5w02xBUlC1J1K2fzSBwU9dCqNU,3698
12
+ src/core/browser/pool.py,sha256=rNsDDpcY1RbZ73mB4K_6t3Ei5mtmbAjLKiIVEDy-uIg,9978
13
+ src/core/browser/proxy_pool.py,sha256=EAnue0GDyw4TpFQAS57pTHTtDMMIJhuUD4yneZfrSZo,11684
14
+ src/core/browser/stealth.py,sha256=UifTLuLFzBw3v9eT06R_65aqkEiWlaGn6HS91Cuf56w,12574
15
+ src/core/browser/xray.py,sha256=QXRdpPypYX713glAI_uwKucLEpqKEoo9-7AF0l-ppLM,32118
16
+ src/core/cache/__init__.py,sha256=Mu6hiiovGgGteSKuGq0CSqKDchn4NA4kQSRGOvMo1TA,69
17
+ src/core/cache/manager.py,sha256=zPwyc6b59XN-tfOpV04O-kN1mvXuh9HxwBf7tFZZ0Rc,12856
18
+ src/core/http/__init__.py,sha256=LSnS10GFqnmrhQZsdQNSfMsUGqWhufE9wLgO6TIkz9Y,49
19
+ src/core/http/stealth_client.py,sha256=Ng9QcaT8q_8vFlseQekwILEPwZHWyCrDlfwGPwskgh8,8220
20
+ src/core/session/__init__.py,sha256=KlH3UErrJSDnr_VS-crzjM_EHU25W6FjaVgMTLO5ajM,49
21
+ src/core/session/manager.py,sha256=otCV26_iCk-BUX6D5YyX-2bC03wLZ-AYA20FaTnL2ms,17859
22
+ src/crawlers/__init__.py,sha256=rkv1aMsBjbCVOhDgKUK5uFgeZmPw12z2v8um1W_2XL0,34
23
+ src/intelligence/__init__.py,sha256=H6W9IbRm3nsXmAfaYF_xX-6VsVvPaNm7P8QxSuFVvSo,54
24
+ src/intelligence/extraction/__init__.py,sha256=avMYES_kTUGWxuVoJhMU4ZWz4bmQAxB7fuhttDofQWU,67
25
+ src/intelligence/extraction/content.py,sha256=PXYN5TI5i5rJ4tn6O_Bk5NTy-JmTNFiebMOB2AoyszU,11400
26
+ src/intelligence/extraction/smart.py,sha256=Gnb7kFghYFzv8YLJMyjXCQXjAAqdjcrP6s3BY_Y3WDA,18751
27
+ src/intelligence/generator/__init__.py,sha256=jdGdyjr5QSU7fWq1sISn6bfmzqieNlapksMXq732ESg,69
28
+ src/intelligence/generator/crawler_gen.py,sha256=6yg4V0L1H07tHj1WhPfI9WEtu438sU9TRsYDycaCldU,13935
29
+ src/intelligence/js/__init__.py,sha256=-Us_025cYIR2-2cnq9CEbr6vb912W28D7ELsBn-TIoc,42
30
+ src/intelligence/js/analyzer.py,sha256=TpCob6dDFnFmYwV-Hvmn1ok5Zv_5aGwb1saNGx5968I,11592
31
+ src/intelligence/js/deobfuscator.py,sha256=mYir6TFKlqpdZjd9AUxCWSQ89cPzyG53iihIiITFaEo,6300
32
+ src/intelligence/js/dynamic.py,sha256=XzqXBECn23x-saPoLPOQStdNZM1NhgxRn1-DtO_ENJk,3228
33
+ src/intelligence/network/__init__.py,sha256=Ic9aTOwFv51xaGGH1fYjj3OBeY8Ncc66mYoJYLw50lQ,49
34
+ src/intelligence/network/analyzer.py,sha256=NnseVEUubJJLWPGKiuz4ELKqvbVQmhg76qJJnxYTeFg,10002
35
+ src/intelligence/network/api_discovery.py,sha256=DvbKlaRyslHzhLgr2S8TwzruJRk0WRHPHfeYMjZR_Js,19371
36
+ src/intelligence/network/graphql.py,sha256=CFDq6l9mvVRYrPS3TE8bb7-haIKzyi3Jm18lThoYFto,1516
37
+ src/intelligence/network/interceptor.py,sha256=hfzABtWBmOobhdpPUo5TL7fqeqap6YPn0YYdPZ685aQ,8326
38
+ src/intelligence/network/sitemap.py,sha256=bwq-Gk_YKktVWwHNO0j4mt7ciauLsffugDn569yUKRU,8960
39
+ src/intelligence/network/websocket.py,sha256=xpZcmnBHEqCmMd0nUQYmt2WnMkNDxkypjruBOj3eie4,1963
40
+ src/intelligence/recorder/__init__.py,sha256=Hw4-OP8XDaRgucPimEzvzd7BeCqfsw2rXYaylurk0CU,44
41
+ src/intelligence/recorder/session.py,sha256=Ubd1LsISbAvFFE6xquhhbx0Y1wWjgdO-dNGZCvo2T-Q,12443
42
+ src/intelligence/recorder/state_machine.py,sha256=aLlBdj8L2ATStq_iI-25JWGrTIGGAbkKvh_URBbVS7g,3660
43
+ src/intelligence/security/__init__.py,sha256=oR88l6-6Kva7sj6sT2O9sxQFs-uP2dni2cstidplMa4,46
44
+ src/intelligence/security/auth.py,sha256=XeQAKpMR-Fql8ce1q2NRWD0UKa_CjTHwXvuobwUn-8Q,3046
45
+ src/intelligence/security/bot_detection.py,sha256=Duk1KI43QiThIYohtCZkfgToIqIL2LEo5DuBPd7LOZ0,12493
46
+ src/intelligence/security/captcha_solver.py,sha256=8wg0SKUQlFi-CjPXRRi5tML3HqA_G6_G8RsY32fNUmw,20211
47
+ src/intelligence/security/technology_detector.py,sha256=noxyIErufHUNyvF7QaDh9jZXClVytR-uWnwZ0TFAItY,6651
48
+ src/mcp/__init__.py,sha256=YswTJiEzV0hpeNe2GWxzpgDeFu52UZ86MTOra84UhyU,41
49
+ src/mcp/config.py,sha256=znFr7x48lCoLSzbi3ouJms1Y-kMCLYQ1kamF5lNZ0SQ,6461
50
+ src/mcp/schemas.py,sha256=z3jCyjw9B4xwie4K1f1kV4lKcvyE0tagK0SiUDflneQ,15315
51
+ src/mcp/server.py,sha256=PwvzycgBbaMb-Uoq7ic59U7q4aafnooNp4UpJhSWO7o,180462
52
+ src/mcp/utils.py,sha256=9qYja9u-TLqbDxMgcDoKYdATPami8ITIPrE_pbr6VIA,9833
53
+ src/mcp/tools/__init__.py,sha256=PBZhxyYFpNQ_D99jtZAcqEahqdhS9Nhe5Mq12DCqFAo,33
54
+ src/sites/__init__.py,sha256=gG9z7rEjQH1C4HQ36WLdLT4-FxxMD2-6Rj3yGxob7sI,45
55
+ crawlemoon-1.1.5.dist-info/METADATA,sha256=qZdalWXG4GB5kaXLDycHi25EYOH97G5-m5lYqjNkoLE,8416
56
+ crawlemoon-1.1.5.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
57
+ crawlemoon-1.1.5.dist-info/entry_points.txt,sha256=s3l8_-Bp73iim1643js2KlI5Tv3xZxmfaOZlBqauGZ4,131
58
+ crawlemoon-1.1.5.dist-info/top_level.txt,sha256=74rtVfumQlgAPzR5_2CgYN24MB0XARCg0t-gzk6gTrM,4
59
+ crawlemoon-1.1.5.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ crawl = src.cli.main:main
3
+ crawlemoon = src.mcp.server:main_sync
4
+ crawlemoon-mcp-server = src.mcp.server:main_sync
@@ -0,0 +1,23 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 emad.dev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+
@@ -0,0 +1 @@
1
+ src
src/__init__.py ADDED
@@ -0,0 +1,47 @@
1
+ """Crawlemoon - Advanced Web Crawling Platform."""
2
+
3
+ __version__ = "1.1.5"
4
+
5
+ # Re-export the exception hierarchy so callers can use ``from src import CrawlemoonError``
6
+ from .exceptions import ( # noqa: F401
7
+ CrawlemoonError,
8
+ BrowserError,
9
+ BrowserPoolError,
10
+ BrowserPoolExhaustedError,
11
+ BrowserInitError,
12
+ PageNavigationError,
13
+ PageInteractionError,
14
+ ProxyError,
15
+ NoHealthyProxyError,
16
+ ProxyTestError,
17
+ NetworkError,
18
+ HTTPRequestError,
19
+ RateLimitError,
20
+ ConnectionPoolError,
21
+ CacheError,
22
+ CacheBackendError,
23
+ CacheSerializationError,
24
+ SessionError,
25
+ SessionNotFoundError,
26
+ SessionEncryptionError,
27
+ SessionStorageError,
28
+ RecordingError,
29
+ RecordingNotFoundError,
30
+ RecordingSerializationError,
31
+ RecordingExpiredError,
32
+ AnalysisError,
33
+ APIDiscoveryError,
34
+ GraphQLError,
35
+ JSAnalysisError,
36
+ SitemapError,
37
+ BotDetectionError,
38
+ CaptchaError,
39
+ TechnologyDetectionError,
40
+ ContentExtractionError,
41
+ CrawlerGenerationError,
42
+ ConfigurationError,
43
+ ValidationError,
44
+ URLValidationError,
45
+ )
46
+
47
+
src/cli/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """CLI interface for Crawlemoon."""
2
+
3
+
4
+
5
+
6
+
7
+
8
+
9
+