firecrawl-py 3.3.0__py3-none-any.whl → 3.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of firecrawl-py might be problematic. Click here for more details.

Files changed (82) hide show
  1. firecrawl/__init__.py +1 -1
  2. firecrawl/v2/client.py +3 -0
  3. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/METADATA +1 -1
  4. firecrawl_py-3.3.2.dist-info/RECORD +79 -0
  5. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/top_level.txt +0 -2
  6. build/lib/firecrawl/__init__.py +0 -87
  7. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py +0 -79
  8. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py +0 -188
  9. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py +0 -38
  10. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_map.py +0 -40
  11. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py +0 -137
  12. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_search.py +0 -248
  13. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py +0 -35
  14. build/lib/firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py +0 -43
  15. build/lib/firecrawl/__tests__/e2e/v2/conftest.py +0 -73
  16. build/lib/firecrawl/__tests__/e2e/v2/test_async.py +0 -73
  17. build/lib/firecrawl/__tests__/e2e/v2/test_batch_scrape.py +0 -105
  18. build/lib/firecrawl/__tests__/e2e/v2/test_crawl.py +0 -276
  19. build/lib/firecrawl/__tests__/e2e/v2/test_extract.py +0 -54
  20. build/lib/firecrawl/__tests__/e2e/v2/test_map.py +0 -60
  21. build/lib/firecrawl/__tests__/e2e/v2/test_scrape.py +0 -154
  22. build/lib/firecrawl/__tests__/e2e/v2/test_search.py +0 -269
  23. build/lib/firecrawl/__tests__/e2e/v2/test_usage.py +0 -26
  24. build/lib/firecrawl/__tests__/e2e/v2/test_watcher.py +0 -65
  25. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py +0 -12
  26. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py +0 -61
  27. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py +0 -12
  28. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py +0 -19
  29. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py +0 -50
  30. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py +0 -63
  31. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py +0 -28
  32. build/lib/firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py +0 -117
  33. build/lib/firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py +0 -90
  34. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_params.py +0 -70
  35. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py +0 -240
  36. build/lib/firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py +0 -107
  37. build/lib/firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py +0 -53
  38. build/lib/firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py +0 -92
  39. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py +0 -167
  40. build/lib/firecrawl/__tests__/unit/v2/methods/test_search_validation.py +0 -236
  41. build/lib/firecrawl/__tests__/unit/v2/methods/test_usage_types.py +0 -18
  42. build/lib/firecrawl/__tests__/unit/v2/methods/test_webhook.py +0 -123
  43. build/lib/firecrawl/__tests__/unit/v2/utils/test_validation.py +0 -290
  44. build/lib/firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py +0 -332
  45. build/lib/firecrawl/client.py +0 -242
  46. build/lib/firecrawl/firecrawl.backup.py +0 -4635
  47. build/lib/firecrawl/types.py +0 -161
  48. build/lib/firecrawl/v1/__init__.py +0 -14
  49. build/lib/firecrawl/v1/client.py +0 -4653
  50. build/lib/firecrawl/v2/__init__.py +0 -4
  51. build/lib/firecrawl/v2/client.py +0 -802
  52. build/lib/firecrawl/v2/client_async.py +0 -250
  53. build/lib/firecrawl/v2/methods/aio/__init__.py +0 -1
  54. build/lib/firecrawl/v2/methods/aio/batch.py +0 -85
  55. build/lib/firecrawl/v2/methods/aio/crawl.py +0 -171
  56. build/lib/firecrawl/v2/methods/aio/extract.py +0 -126
  57. build/lib/firecrawl/v2/methods/aio/map.py +0 -59
  58. build/lib/firecrawl/v2/methods/aio/scrape.py +0 -33
  59. build/lib/firecrawl/v2/methods/aio/search.py +0 -172
  60. build/lib/firecrawl/v2/methods/aio/usage.py +0 -42
  61. build/lib/firecrawl/v2/methods/batch.py +0 -417
  62. build/lib/firecrawl/v2/methods/crawl.py +0 -469
  63. build/lib/firecrawl/v2/methods/extract.py +0 -131
  64. build/lib/firecrawl/v2/methods/map.py +0 -77
  65. build/lib/firecrawl/v2/methods/scrape.py +0 -64
  66. build/lib/firecrawl/v2/methods/search.py +0 -197
  67. build/lib/firecrawl/v2/methods/usage.py +0 -41
  68. build/lib/firecrawl/v2/types.py +0 -665
  69. build/lib/firecrawl/v2/utils/__init__.py +0 -9
  70. build/lib/firecrawl/v2/utils/error_handler.py +0 -107
  71. build/lib/firecrawl/v2/utils/get_version.py +0 -15
  72. build/lib/firecrawl/v2/utils/http_client.py +0 -153
  73. build/lib/firecrawl/v2/utils/http_client_async.py +0 -65
  74. build/lib/firecrawl/v2/utils/normalize.py +0 -107
  75. build/lib/firecrawl/v2/utils/validation.py +0 -324
  76. build/lib/firecrawl/v2/watcher.py +0 -301
  77. build/lib/firecrawl/v2/watcher_async.py +0 -242
  78. build/lib/tests/test_change_tracking.py +0 -98
  79. build/lib/tests/test_timeout_conversion.py +0 -117
  80. firecrawl_py-3.3.0.dist-info/RECORD +0 -153
  81. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/LICENSE +0 -0
  82. {firecrawl_py-3.3.0.dist-info → firecrawl_py-3.3.2.dist-info}/WHEEL +0 -0
firecrawl/__init__.py CHANGED
@@ -17,7 +17,7 @@ from .v1 import (
17
17
  V1ChangeTrackingOptions,
18
18
  )
19
19
 
20
- __version__ = "3.3.0"
20
+ __version__ = "3.3.2"
21
21
 
22
22
  # Define the logger for the Firecrawl project
23
23
  logger: logging.Logger = logging.getLogger("firecrawl")
firecrawl/v2/client.py CHANGED
@@ -13,6 +13,7 @@ from .types import (
13
13
  SearchRequest,
14
14
  SearchData,
15
15
  SourceOption,
16
+ CategoryOption,
16
17
  CrawlRequest,
17
18
  CrawlResponse,
18
19
  CrawlJob,
@@ -171,6 +172,7 @@ class FirecrawlClient:
171
172
  query: str,
172
173
  *,
173
174
  sources: Optional[List[SourceOption]] = None,
175
+ categories: Optional[List[CategoryOption]] = None,
174
176
  limit: Optional[int] = None,
175
177
  tbs: Optional[str] = None,
176
178
  location: Optional[str] = None,
@@ -195,6 +197,7 @@ class FirecrawlClient:
195
197
  request = SearchRequest(
196
198
  query=query,
197
199
  sources=sources,
200
+ categories=categories,
198
201
  limit=limit,
199
202
  tbs=tbs,
200
203
  location=location,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: firecrawl-py
3
- Version: 3.3.0
3
+ Version: 3.3.2
4
4
  Summary: Python SDK for Firecrawl API
5
5
  Home-page: https://github.com/firecrawl/firecrawl
6
6
  Author: Mendable.ai
@@ -0,0 +1,79 @@
1
+ firecrawl/__init__.py,sha256=l_9wjt7rVPqg4ovtOFC46I_8LghOX_jDFoEXuSavJJ0,2192
2
+ firecrawl/client.py,sha256=2BGIRTiW2eR6q3wu_g2s3VTQtrHYauoDeNF1YklQpHo,11089
3
+ firecrawl/firecrawl.backup.py,sha256=v1FEN3jR4g5Aupg4xp6SLkuFvYMQuUKND2YELbYjE6c,200430
4
+ firecrawl/types.py,sha256=W9N2pqQuevEIIjYHN9rbDf31E-nwdCECqIn11Foz2T8,2836
5
+ firecrawl/__tests__/e2e/v2/conftest.py,sha256=I28TUpN5j0-9gM79NlbrDS8Jlsheao657od2f-2xK0Y,2587
6
+ firecrawl/__tests__/e2e/v2/test_async.py,sha256=ZXpf1FVOJgNclITglrxIyFwP4cOiqzWLicGaxIm70BQ,2526
7
+ firecrawl/__tests__/e2e/v2/test_batch_scrape.py,sha256=H9GtuwHIFdOQ958SOVThi_kvDDxcXAK_ECRh95ogonQ,3265
8
+ firecrawl/__tests__/e2e/v2/test_crawl.py,sha256=cOssZvIwtghAtLiM1QdNLhPEwAxZ9j9umTrBUPtJjpU,9951
9
+ firecrawl/__tests__/e2e/v2/test_extract.py,sha256=HgvGiDlyWtFygiPo5EP44Dem1oWrwgRF-hfc1LfeVSU,1670
10
+ firecrawl/__tests__/e2e/v2/test_map.py,sha256=9sT-Yq8V_8c9esl_bv5hnTA9WXb2Dg81kj6M-s0484c,1618
11
+ firecrawl/__tests__/e2e/v2/test_scrape.py,sha256=psW2nfcA_hMFpZ4msL_VJWJTMa3Sidp11ubhftbm52g,5759
12
+ firecrawl/__tests__/e2e/v2/test_search.py,sha256=tvU9_eg_3H5em0fhIwPPjuYe9BRAQ5St-BLM0l_FfVs,9079
13
+ firecrawl/__tests__/e2e/v2/test_usage.py,sha256=JlBkYblhThua5qF2crRjsPpq4Ja0cBsdzxZ5zxXnQ_Y,805
14
+ firecrawl/__tests__/e2e/v2/test_watcher.py,sha256=OPTKLhVAKWqXl2Tieo6zCN1xpEwZDsz-B977CVJgLMA,1932
15
+ firecrawl/__tests__/e2e/v2/aio/test_aio_batch_scrape.py,sha256=gJv_mLzzoAYftETB2TLkrpSfB5c04kaYgkD4hQTYsIg,2639
16
+ firecrawl/__tests__/e2e/v2/aio/test_aio_crawl.py,sha256=X-nk5tkYUYIkM6kTYl7GDjvxh2JT9GxJqk2KlO8xpWw,7282
17
+ firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py,sha256=3CNRIFzgBMcOYOLhnKcK1k5a3Gy--u08EGDkL31uieM,1199
18
+ firecrawl/__tests__/e2e/v2/aio/test_aio_map.py,sha256=nckl1kbiEaaTdu5lm__tOoTDG-txTYwwSH3KZEvyKzc,1199
19
+ firecrawl/__tests__/e2e/v2/aio/test_aio_scrape.py,sha256=b17A7advBEjxrjdait2w8GHztZeKy_P3zZ3ixm5H7xw,4453
20
+ firecrawl/__tests__/e2e/v2/aio/test_aio_search.py,sha256=ehV0Ai_hknAkaoE551j2lbktV4bi_J0h3FKzC7G15Iw,8246
21
+ firecrawl/__tests__/e2e/v2/aio/test_aio_usage.py,sha256=Dh9BVo48NKSZOKgLbO7n8fpMjvYmeMXDFzbIhnCTMhE,1014
22
+ firecrawl/__tests__/e2e/v2/aio/test_aio_watcher.py,sha256=hwES4Nu5c0hniZ9heIPDfvh_2JmJ2wPoX9ULTZ0Asjs,1471
23
+ firecrawl/__tests__/unit/v2/methods/test_batch_request_preparation.py,sha256=HeOxN-sPYSssytcIRAEicJSZsFt_Oa5qGXAtdumR54c,4040
24
+ firecrawl/__tests__/unit/v2/methods/test_crawl_params.py,sha256=p9hzg14uAs1iHKXPDSXhGU6hEzPBF_Ae34RAf5XYa10,2387
25
+ firecrawl/__tests__/unit/v2/methods/test_crawl_request_preparation.py,sha256=PEKbooNXfQwPpvcPHXABJnveztgAA-RFBhtlSs8uPro,8780
26
+ firecrawl/__tests__/unit/v2/methods/test_crawl_validation.py,sha256=kErOmHSD01eMjXiMd4rgsMVGd_aU2G9uVymBjbAFoGw,3918
27
+ firecrawl/__tests__/unit/v2/methods/test_map_request_preparation.py,sha256=toVcgnMp_cFeYsIUuyKGEWZGp0nAAkzaeFGUbY0zY0o,1868
28
+ firecrawl/__tests__/unit/v2/methods/test_scrape_request_preparation.py,sha256=wDOslsA5BN4kyezlaT5GeMv_Ifn8f461EaA7i5ujnaQ,3482
29
+ firecrawl/__tests__/unit/v2/methods/test_search_request_preparation.py,sha256=14lUgFpQsiosgMKjDustBRVE0zXnHujBI76F8BC5PZ4,6072
30
+ firecrawl/__tests__/unit/v2/methods/test_search_validation.py,sha256=7UGcNHpQzCpZbAPYjthfdPFWmAPcoApY-ED-khtuANs,9498
31
+ firecrawl/__tests__/unit/v2/methods/test_usage_types.py,sha256=cCHHfa6agSjD0brQ9rcAcw2kaI9riUH5C0dXV-fqktg,591
32
+ firecrawl/__tests__/unit/v2/methods/test_webhook.py,sha256=AvvW-bKpUA--Lvtif2bmUIp-AxiaMJ29ie1i9dk8WbI,4586
33
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py,sha256=9azJxVvDOBqUevLp-wBF9gF7Ptj-7nN6LOkPQncFX2M,456
34
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_request_preparation.py,sha256=RkIKt7uxBzVhAkrLQwXYjmC-9sj32SUNQrJZgF2WEMs,2565
35
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py,sha256=WMgltdrrT2HOflqGyahC4v-Wb29_8sypN0hwS9lYXe8,403
36
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py,sha256=PdUJrR0JLWqrithAnRXwuRrnsIN2h_DTu6-xvTOn_UU,725
37
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_scrape_request_preparation.py,sha256=A5DT4wpH4vrIPvFxKVHrtDH5A3bgJ_ad4fmVQ8LN1t0,1993
38
+ firecrawl/__tests__/unit/v2/methods/aio/test_aio_search_request_preparation.py,sha256=hFk4XgqF3aFPGFJe0ikB1uwf_0FsppNGA088OrWUXvg,2091
39
+ firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py,sha256=E26UnUhpbjG-EG0ab4WRD94AxA5IBWmIHq8ZLBOWoAA,1202
40
+ firecrawl/__tests__/unit/v2/methods/aio/test_ensure_async.py,sha256=pUwuWhRbVUTbgsZn4hgZesMkTMesTv_NPmvFW--ls-Y,3815
41
+ firecrawl/__tests__/unit/v2/utils/test_validation.py,sha256=E4n4jpBhH_W7E0ikI5r8KMAKiOhbfGD3i_B8-dv3PlI,10803
42
+ firecrawl/__tests__/unit/v2/watcher/test_ws_watcher.py,sha256=87w47n0iOihtu4jTR4-4rw1-xVKWmLg2BOBGxjQPnUk,9517
43
+ firecrawl/v1/__init__.py,sha256=aP1oisPeZVGGZynvENc07JySMOZfv_4zAlxQ0ecMJXA,481
44
+ firecrawl/v1/client.py,sha256=sydurfEFTsXyowyaGryA1lkPxN_r9Nf6iQpM43OwJyM,201672
45
+ firecrawl/v2/__init__.py,sha256=Jc6a8tBjYG5OPkjDM5pl-notyys-7DEj7PLEfepv3fc,137
46
+ firecrawl/v2/client.py,sha256=_DZFZO1aWvODzznK0g2Svcd2-xxXgWGR0d9vniNlk1w,30621
47
+ firecrawl/v2/client_async.py,sha256=zwxHis1bSh0tSF1480ze-4XDQEDJ5yDur1ZqtL94dwc,10127
48
+ firecrawl/v2/types.py,sha256=rBdTaTQmb1SmdR8O0GvA_gCfBG-QCtBOrMsFgA9Usms,22114
49
+ firecrawl/v2/watcher.py,sha256=FOU71tqSKxgeuGycu4ye0SLc2dw7clIcoQjPsi-4Csc,14229
50
+ firecrawl/v2/watcher_async.py,sha256=AVjW2mgABniolSsauK4u0FW8ya6WzRUdyEg2R-8vGCw,10278
51
+ firecrawl/v2/methods/batch.py,sha256=us7zUGl7u9ZDIEk2J3rNqj87bkaNjXU27SMFW_fdcg8,11932
52
+ firecrawl/v2/methods/crawl.py,sha256=4ZUmanHNuNtq9wbKMAZ3lenuPcNdOaV0kYXqMI5XJJ8,15485
53
+ firecrawl/v2/methods/extract.py,sha256=-Jr4BtraU3b7hd3JIY73V-S69rUclxyXyUpoQb6DCQk,4274
54
+ firecrawl/v2/methods/map.py,sha256=4SADb0-lkbdOWDmO6k8_TzK0yRti5xsN40N45nUl9uA,2592
55
+ firecrawl/v2/methods/scrape.py,sha256=CSHBwC-P91UfrW3zHirjNAs2h899FKcWvd1DY_4fJdo,1921
56
+ firecrawl/v2/methods/search.py,sha256=6BKiQ1aKJjWBKm9BBtKxFKGD74kCKBeMIp_OgjcDFAw,7673
57
+ firecrawl/v2/methods/usage.py,sha256=OJlkxwaB-AAtgO3WLr9QiqBRmjdh6GVhroCgleegupQ,1460
58
+ firecrawl/v2/methods/aio/__init__.py,sha256=RocMJnGwnLIvGu3G8ZvY8INkipC7WHZiu2bE31eSyJs,35
59
+ firecrawl/v2/methods/aio/batch.py,sha256=GS_xsd_Uib1fxFITBK1sH88VGzFMrIcqJVQqOvMQ540,3735
60
+ firecrawl/v2/methods/aio/crawl.py,sha256=pC6bHVk30Hj1EJdAChxpMOg0Xx_GVqq4tIlvU2e5RQ4,6688
61
+ firecrawl/v2/methods/aio/extract.py,sha256=IfNr2ETqt4dR73JFzrEYI4kk5vpKnJOG0BmPEjGEoO4,4217
62
+ firecrawl/v2/methods/aio/map.py,sha256=EuT-5A0cQr_e5SBfEZ6pnl8u0JUwEEvSwhyT2N-QoKU,2326
63
+ firecrawl/v2/methods/aio/scrape.py,sha256=ilA9qco8YGwCFpE0PN1XBQUyuHPQwH2QioZ-xsfxhgU,1386
64
+ firecrawl/v2/methods/aio/search.py,sha256=_TqTFGQLlOCCLNdWcOvakTqPGD2r9AOlBg8RasOgmvw,6177
65
+ firecrawl/v2/methods/aio/usage.py,sha256=OtBi6X-aT09MMR2dpm3vBCm9JrJZIJLCQ8jJ3L7vie4,1606
66
+ firecrawl/v2/utils/__init__.py,sha256=i1GgxySmqEXpWSBQCu3iZBPIJG7fXj0QXCDWGwerWNs,338
67
+ firecrawl/v2/utils/error_handler.py,sha256=Iuf916dHphDY8ObNNlWy75628DFeJ0Rv8ljRp4LttLE,4199
68
+ firecrawl/v2/utils/get_version.py,sha256=0CxW_41q2hlzIxEWOivUCaYw3GFiSIH32RPUMcIgwAY,492
69
+ firecrawl/v2/utils/http_client.py,sha256=_n8mp4xi6GGihg662Lsv6TSlvw9zykyADwEk0fg8mYA,4873
70
+ firecrawl/v2/utils/http_client_async.py,sha256=iy89_bk2HS3afSRHZ8016eMCa9Fk-5MFTntcOHfbPgE,1936
71
+ firecrawl/v2/utils/normalize.py,sha256=nlTU6QRghT1YKZzNZlIQj4STSRuSUGrS9cCErZIcY5w,3636
72
+ firecrawl/v2/utils/validation.py,sha256=L8by7z-t6GuMGIYkK7il1BM8d-4_-sAdG9hDMF_LeG4,14518
73
+ tests/test_change_tracking.py,sha256=_IJ5ShLcoj2fHDBaw-nE4I4lHdmDB617ocK_XMHhXps,4177
74
+ tests/test_timeout_conversion.py,sha256=PWlIEMASQNhu4cp1OW_ebklnE9NCiigPnEFCtI5N3w0,3996
75
+ firecrawl_py-3.3.2.dist-info/LICENSE,sha256=nPCunEDwjRGHlmjvsiDUyIWbkqqyj3Ej84ntnh0g0zA,1084
76
+ firecrawl_py-3.3.2.dist-info/METADATA,sha256=xGcVdw1lhmAlArE2riSVeSnQV_ht0EalkT_QGm8HaNM,7316
77
+ firecrawl_py-3.3.2.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
78
+ firecrawl_py-3.3.2.dist-info/top_level.txt,sha256=8T3jOaSN5mtLghO-R3MQ8KO290gIX8hmfxQmglBPdLE,16
79
+ firecrawl_py-3.3.2.dist-info/RECORD,,
@@ -1,4 +1,2 @@
1
- build
2
- dist
3
1
  firecrawl
4
2
  tests
@@ -1,87 +0,0 @@
1
- """
2
- Firecrawl Python SDK
3
-
4
- """
5
-
6
- import logging
7
- import os
8
-
9
- from .client import Firecrawl, AsyncFirecrawl, FirecrawlApp, AsyncFirecrawlApp
10
- from .v2.watcher import Watcher
11
- from .v2.watcher_async import AsyncWatcher
12
- from .v1 import (
13
- V1FirecrawlApp,
14
- AsyncV1FirecrawlApp,
15
- V1JsonConfig,
16
- V1ScrapeOptions,
17
- V1ChangeTrackingOptions,
18
- )
19
-
20
- __version__ = "3.3.0"
21
-
22
- # Define the logger for the Firecrawl project
23
- logger: logging.Logger = logging.getLogger("firecrawl")
24
-
25
-
26
- def _configure_logger() -> None:
27
- """
28
- Configure the firecrawl logger for console output.
29
-
30
- The function attaches a handler for console output with a specific format and date
31
- format to the firecrawl logger.
32
- """
33
- try:
34
- formatter = logging.Formatter(
35
- "[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
36
- datefmt="%Y-%m-%d %H:%M:%S",
37
- )
38
-
39
- console_handler = logging.StreamHandler()
40
- console_handler.setFormatter(formatter)
41
-
42
- logger.addHandler(console_handler)
43
- except Exception as e:
44
- logger.error("Failed to configure logging: %s", e)
45
-
46
-
47
- def setup_logging() -> None:
48
- """Set up logging based on the FIRECRAWL_LOGGING_LEVEL environment variable."""
49
- if logger.hasHandlers():
50
- return
51
-
52
- if not (env := os.getenv("FIRECRAWL_LOGGING_LEVEL", "").upper()):
53
- logger.addHandler(logging.NullHandler())
54
- return
55
-
56
- _configure_logger()
57
-
58
- if env == "DEBUG":
59
- logger.setLevel(logging.DEBUG)
60
- elif env == "INFO":
61
- logger.setLevel(logging.INFO)
62
- elif env == "WARNING":
63
- logger.setLevel(logging.WARNING)
64
- elif env == "ERROR":
65
- logger.setLevel(logging.ERROR)
66
- elif env == "CRITICAL":
67
- logger.setLevel(logging.CRITICAL)
68
- else:
69
- logger.setLevel(logging.INFO)
70
- logger.warning("Unknown logging level: %s, defaulting to INFO", env)
71
-
72
- setup_logging()
73
- logger.debug("Debugging logger setup")
74
-
75
- __all__ = [
76
- 'Firecrawl',
77
- 'AsyncFirecrawl',
78
- 'FirecrawlApp',
79
- 'AsyncFirecrawlApp',
80
- 'Watcher',
81
- 'AsyncWatcher',
82
- 'V1FirecrawlApp',
83
- 'AsyncV1FirecrawlApp',
84
- 'V1JsonConfig',
85
- 'V1ScrapeOptions',
86
- 'V1ChangeTrackingOptions',
87
- ]
@@ -1,79 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
- from firecrawl import AsyncFirecrawl
6
-
7
-
8
- load_dotenv()
9
-
10
- if not os.getenv("API_KEY"):
11
- raise ValueError("API_KEY is not set")
12
-
13
- if not os.getenv("API_URL"):
14
- raise ValueError("API_URL is not set")
15
-
16
-
17
- @pytest.mark.asyncio
18
- async def test_async_batch_start_and_status():
19
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
20
- start = await client.start_batch_scrape([
21
- "https://docs.firecrawl.dev",
22
- "https://firecrawl.dev",
23
- ], formats=["markdown"], max_concurrency=1)
24
- job_id = start.id
25
-
26
- deadline = asyncio.get_event_loop().time() + 240
27
- status = await client.get_batch_scrape_status(job_id)
28
- while status.status not in ("completed", "failed", "cancelled") and asyncio.get_event_loop().time() < deadline:
29
- await asyncio.sleep(2)
30
- status = await client.get_batch_scrape_status(job_id)
31
-
32
- assert status.status in ("completed", "failed", "cancelled")
33
-
34
-
35
- @pytest.mark.asyncio
36
- async def test_async_batch_wait_minimal():
37
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
38
- job = await client.batch_scrape([
39
- "https://docs.firecrawl.dev",
40
- "https://firecrawl.dev",
41
- ], formats=["markdown"], poll_interval=1, timeout=120)
42
- assert job.status in ("completed", "failed")
43
-
44
-
45
- @pytest.mark.asyncio
46
- async def test_async_batch_wait_with_all_params():
47
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
48
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
49
- job = await client.batch_scrape(
50
- [
51
- "https://docs.firecrawl.dev",
52
- "https://firecrawl.dev",
53
- ],
54
- formats=[
55
- "markdown",
56
- {"type": "json", "prompt": "Extract page title", "schema": json_schema},
57
- {"type": "changeTracking", "prompt": "Track changes", "modes": ["json"]},
58
- ],
59
- only_main_content=True,
60
- mobile=False,
61
- ignore_invalid_urls=True,
62
- max_concurrency=2,
63
- zero_data_retention=False,
64
- poll_interval=1,
65
- timeout=180,
66
- )
67
- assert job.status in ("completed", "failed")
68
-
69
-
70
- @pytest.mark.asyncio
71
- async def test_async_cancel_batch():
72
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
73
- start = await client.start_batch_scrape([
74
- "https://docs.firecrawl.dev",
75
- "https://firecrawl.dev",
76
- ], formats=["markdown"], max_concurrency=1)
77
- ok = await client.cancel_batch_scrape(start.id)
78
- assert ok is True
79
-
@@ -1,188 +0,0 @@
1
- import os
2
- import asyncio
3
- import pytest
4
- from dotenv import load_dotenv
5
- from firecrawl import AsyncFirecrawl
6
- from firecrawl.v2.types import ScrapeOptions
7
-
8
-
9
- load_dotenv()
10
-
11
- if not os.getenv("API_KEY"):
12
- raise ValueError("API_KEY is not set")
13
-
14
- if not os.getenv("API_URL"):
15
- raise ValueError("API_URL is not set")
16
-
17
-
18
- @pytest.mark.asyncio
19
- async def test_async_crawl_start_and_status():
20
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
21
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
22
- job_id = start.id
23
-
24
- deadline = asyncio.get_event_loop().time() + 180
25
- status = await client.get_crawl_status(job_id)
26
- while status.status not in ("completed", "failed") and asyncio.get_event_loop().time() < deadline:
27
- await asyncio.sleep(2)
28
- status = await client.get_crawl_status(job_id)
29
-
30
- assert status.status in ("completed", "failed")
31
-
32
-
33
- @pytest.mark.asyncio
34
- async def test_async_crawl_with_all_params():
35
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
36
- # rich scrape options including json format
37
- json_schema = {
38
- "type": "object",
39
- "properties": {"title": {"type": "string"}},
40
- "required": ["title"],
41
- }
42
- status = await client.crawl(
43
- url="https://docs.firecrawl.dev",
44
- prompt="Extract docs and blog",
45
- include_paths=["/docs/*", "/blog/*"],
46
- exclude_paths=["/admin/*"],
47
- max_discovery_depth=2,
48
- ignore_sitemap=False,
49
- ignore_query_parameters=True,
50
- limit=5,
51
- crawl_entire_domain=False,
52
- allow_external_links=True,
53
- allow_subdomains=True,
54
- delay=1,
55
- max_concurrency=2,
56
- webhook="https://example.com/hook",
57
- scrape_options=ScrapeOptions(
58
- formats=[
59
- "markdown",
60
- "rawHtml",
61
- {"type": "json", "prompt": "Extract title", "schema": json_schema},
62
- ],
63
- only_main_content=True,
64
- mobile=False,
65
- timeout=20000,
66
- wait_for=500,
67
- skip_tls_verification=False,
68
- remove_base64_images=False,
69
- ),
70
- zero_data_retention=False,
71
- poll_interval=2,
72
- timeout=180,
73
- )
74
- assert status.status in ("completed", "failed")
75
-
76
-
77
- @pytest.mark.asyncio
78
- async def test_async_start_crawl_with_options():
79
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
80
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=5, max_discovery_depth=2)
81
- assert start.id is not None and start.url is not None
82
-
83
-
84
- @pytest.mark.asyncio
85
- async def test_async_start_crawl_with_prompt():
86
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
87
- start = await client.start_crawl("https://firecrawl.dev", prompt="Extract all blog posts", limit=3)
88
- assert start.id is not None and start.url is not None
89
-
90
-
91
- @pytest.mark.asyncio
92
- async def test_async_get_crawl_status_shape():
93
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
94
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
95
- status = await client.get_crawl_status(start.id)
96
- assert status.status in ("scraping", "completed", "failed")
97
- assert status.completed >= 0
98
- assert status.expires_at is not None
99
- assert isinstance(status.data, list)
100
-
101
-
102
- @pytest.mark.asyncio
103
- async def test_async_crawl_with_wait():
104
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
105
- job = await client.crawl(url="https://docs.firecrawl.dev", limit=3, max_discovery_depth=2, poll_interval=1, timeout=120)
106
- assert job.status in ("completed", "failed")
107
- assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
108
-
109
-
110
- @pytest.mark.asyncio
111
- async def test_async_crawl_with_prompt_and_wait():
112
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
113
- job = await client.crawl(url="https://docs.firecrawl.dev", prompt="Extract all blog posts", limit=3, poll_interval=1, timeout=120)
114
- assert job.status in ("completed", "failed")
115
- assert job.completed >= 0 and job.total >= 0 and isinstance(job.data, list)
116
-
117
-
118
- @pytest.mark.asyncio
119
- async def test_async_crawl_with_scrape_options():
120
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
121
- scrape_opts = ScrapeOptions(formats=["markdown", "links"], only_main_content=False, mobile=True)
122
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
123
- assert start.id is not None
124
-
125
-
126
- @pytest.mark.asyncio
127
- async def test_async_crawl_with_json_format_object():
128
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
129
- json_schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
130
- scrape_opts = ScrapeOptions(formats=[{"type": "json", "prompt": "Extract page title", "schema": json_schema}])
131
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2, scrape_options=scrape_opts)
132
- assert start.id is not None
133
-
134
-
135
- @pytest.mark.asyncio
136
- async def test_async_cancel_crawl():
137
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
138
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
139
- cancelled = await client.cancel_crawl(start.id)
140
- assert cancelled is True
141
-
142
-
143
- @pytest.mark.asyncio
144
- async def test_async_get_crawl_errors_and_invalid_job():
145
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
146
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=2)
147
- errs = await client.get_crawl_errors(start.id)
148
- assert hasattr(errs, "errors") and hasattr(errs, "robots_blocked")
149
- with pytest.raises(Exception):
150
- await client.get_crawl_errors("invalid-job-id-12345")
151
-
152
-
153
- @pytest.mark.asyncio
154
- async def test_async_active_crawls():
155
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
156
- resp = await client.active_crawls()
157
- assert hasattr(resp, "success") and hasattr(resp, "crawls")
158
-
159
-
160
- @pytest.mark.asyncio
161
- async def test_async_active_crawls_with_running_crawl():
162
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
163
- start = await client.start_crawl("https://docs.firecrawl.dev", limit=3)
164
- # fetch active crawls and assert our ID is listed
165
- active = await client.active_crawls()
166
- ids = [c.id for c in active.crawls]
167
- assert start.id in ids
168
- # cleanup
169
- await client.cancel_crawl(start.id)
170
-
171
-
172
- @pytest.mark.asyncio
173
- async def test_async_crawl_params_preview():
174
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
175
- params = await client.crawl_params_preview(
176
- url="https://docs.firecrawl.dev",
177
- prompt="Extract all blog posts and documentation",
178
- )
179
- assert params is not None
180
- # basic sanity: at least one field should be suggested
181
- has_any = any([
182
- getattr(params, "limit", None) is not None,
183
- getattr(params, "include_paths", None) is not None,
184
- getattr(params, "max_discovery_depth", None) is not None,
185
- ])
186
- assert has_any
187
-
188
-
@@ -1,38 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
-
6
-
7
- load_dotenv()
8
-
9
- if not os.getenv("API_KEY"):
10
- raise ValueError("API_KEY is not set")
11
-
12
- if not os.getenv("API_URL"):
13
- raise ValueError("API_URL is not set")
14
-
15
-
16
- @pytest.mark.asyncio
17
- async def test_async_extract_minimal():
18
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
- res = await client.extract(urls=["https://docs.firecrawl.dev"], prompt="Extract title")
20
- assert res is not None
21
-
22
-
23
- @pytest.mark.asyncio
24
- async def test_async_extract_with_schema_and_options():
25
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
26
- schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}
27
- res = await client.extract(
28
- urls=["https://docs.firecrawl.dev"],
29
- prompt="Extract title",
30
- schema=schema,
31
- system_prompt="You are a helpful extractor",
32
- allow_external_links=False,
33
- enable_web_search=False,
34
- show_sources=False,
35
- # agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI
36
- )
37
- assert res is not None
38
-
@@ -1,40 +0,0 @@
1
- import os
2
- import pytest
3
- from dotenv import load_dotenv
4
- from firecrawl import AsyncFirecrawl
5
-
6
-
7
- load_dotenv()
8
-
9
- if not os.getenv("API_KEY"):
10
- raise ValueError("API_KEY is not set")
11
-
12
- if not os.getenv("API_URL"):
13
- raise ValueError("API_URL is not set")
14
-
15
-
16
- @pytest.mark.asyncio
17
- async def test_async_map_minimal():
18
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
19
- resp = await client.map("https://docs.firecrawl.dev")
20
- assert hasattr(resp, "links") and isinstance(resp.links, list)
21
- if resp.links:
22
- first = resp.links[0]
23
- assert hasattr(first, "url") and isinstance(first.url, str) and first.url.startswith("http")
24
-
25
-
26
- @pytest.mark.asyncio
27
- @pytest.mark.parametrize("sitemap", ["only", "include", "skip"])
28
- async def test_async_map_with_all_params(sitemap):
29
- client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL"))
30
- resp = await client.map(
31
- "https://docs.firecrawl.dev",
32
- search="docs",
33
- include_subdomains=True,
34
- limit=10,
35
- sitemap=sitemap,
36
- timeout=15000,
37
- )
38
- assert hasattr(resp, "links") and isinstance(resp.links, list)
39
- assert len(resp.links) <= 10
40
-