firecrawl-toolkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ # firecrawl/__init__.py
2
+ from .server import main
3
+
4
+ __all__ = ["main"]
@@ -0,0 +1,10 @@
1
+ # firecrawl_toolkit/__main__.py
2
+
3
+ from . import server
4
+
5
+ def main():
6
+ """Package entry point."""
7
+ server.main()
8
+
9
+ if __name__ == "__main__":
10
+ main()
@@ -0,0 +1,248 @@
1
+ {
2
+ "AF": ["AF", "Afghanistan", "Islamic Republic of Afghanistan", "AFG", "阿富汗", "阿富汗伊斯兰共和国"],
3
+ "AL": ["AL", "Albania", "Republic of Albania", "ALB", "阿尔巴尼亚"],
4
+ "DZ": ["DZ", "Algeria", "People's Democratic Republic of Algeria", "DZA", "阿尔及利亚"],
5
+ "AS": ["AS", "American Samoa", "AS", "美属萨摩亚"],
6
+ "AD": ["AD", "Andorra", "Principality of Andorra", "AND", "安道尔"],
7
+ "AO": ["AO", "Angola", "Republic of Angola", "AGO", "安哥拉"],
8
+ "AI": ["AI", "Anguilla", "AI", "安圭拉"],
9
+ "AQ": ["AQ", "Antarctica", "AQ", "南极洲"],
10
+ "AG": ["AG", "Antigua and Barbuda", "Antigua & Barbuda", "ATG", "安提瓜和巴布达"],
11
+ "AR": ["AR", "Argentina", "Argentine Republic", "ARG", "阿根廷"],
12
+ "AM": ["AM", "Armenia", "Republic of Armenia", "ARM", "亚美尼亚"],
13
+ "AW": ["AW", "Aruba", "AW", "阿鲁巴"],
14
+ "AU": ["AU", "Australia", "Commonwealth of Australia", "AUS", "澳大利亚", "Australia", "Down Under", "Oz"],
15
+ "AT": ["AT", "Austria", "Republic of Austria", "AUT", "奥地利"],
16
+ "AZ": ["AZ", "Azerbaijan", "Republic of Azerbaijan", "AZE", "阿塞拜疆"],
17
+ "BS": ["BS", "Bahamas", "Commonwealth of The Bahamas", "BHS", "巴哈马", "The Bahamas"],
18
+ "BH": ["BH", "Bahrain", "Kingdom of Bahrain", "BHR", "巴林"],
19
+ "BD": ["BD", "Bangladesh", "People's Republic of Bangladesh", "BGD", "孟加拉国", "孟加拉"],
20
+ "BB": ["BB", "Barbados", "BB", "巴巴多斯"],
21
+ "BY": ["BY", "Belarus", "Republic of Belarus", "BLR", "白俄罗斯"],
22
+ "BE": ["BE", "Belgium", "Kingdom of Belgium", "BEL", "比利时"],
23
+ "BZ": ["BZ", "Belize", "BZE", "伯利兹"],
24
+ "BJ": ["BJ", "Benin", "Republic of Benin", "BEN", "贝宁"],
25
+ "BM": ["BM", "Bermuda", "BM", "百慕大"],
26
+ "BT": ["BT", "Bhutan", "Kingdom of Bhutan", "BTN", "不丹"],
27
+ "BO": ["BO", "Bolivia", "Plurinational State of Bolivia", "BOL", "玻利维亚"],
28
+ "BA": ["BA", "Bosnia and Herzegovina", "BiH", "BA", "波黑", "波斯尼亚和黑塞哥维那"],
29
+ "BW": ["BW", "Botswana", "Republic of Botswana", "BWA", "博茨瓦纳"],
30
+ "BV": ["BV", "Bouvet Island", "Bouvetøya", "BV", "布韦岛"],
31
+ "BR": ["BR", "Brazil", "Federative Republic of Brazil", "BRA", "巴西"],
32
+ "IO": ["IO", "British Indian Ocean Territory", "BIOT", "英属印度洋领地"],
33
+ "BN": ["BN", "Brunei", "Brunei Darussalam", "BRN", "文莱", "文莱达鲁萨兰国"],
34
+ "BG": ["BG", "Bulgaria", "Republic of Bulgaria", "BGR", "保加利亚"],
35
+ "BF": ["BF", "Burkina Faso", "BFA", "布基纳法索"],
36
+ "BI": ["BI", "Burundi", "Republic of Burundi", "BDI", "布隆迪"],
37
+ "KH": ["KH", "Cambodia", "Kingdom of Cambodia", "KHM", "柬埔寨"],
38
+ "CM": ["CM", "Cameroon", "Republic of Cameroon", "CMR", "喀麦隆"],
39
+ "CA": ["CA", "Canada", "CA", "加拿大"],
40
+ "CV": ["CV", "Cape Verde", "Cabo Verde", "CPV", "佛得角", "开普绿洲"],
41
+ "KY": ["KY", "Cayman Islands", "KY", "开曼群岛"],
42
+ "CF": ["CF", "Central African Republic", "CAR", "中非共和国", "中非"],
43
+ "TD": ["TD", "Chad", "TCD", "乍得"],
44
+ "CL": ["CL", "Chile", "Republic of Chile", "CHL", "智利"],
45
+ "CN": ["CN", "China", "People's Republic of China", "PRC", "CHN", "中国", "中华人民共和国", "Mainland China", "大陆"],
46
+ "CX": ["CX", "Christmas Island", "CX", "圣诞岛"],
47
+ "CC": ["CC", "Cocos (Keeling) Islands", "CC", "科科斯(基林)群岛"],
48
+ "CO": ["CO", "Colombia", "Republic of Colombia", "COL", "哥伦比亚"],
49
+ "KM": ["KM", "Comoros", "Union of the Comoros", "COM", "科摩罗"],
50
+ "CG": ["CG", "Congo", "Republic of the Congo", "COG", "刚果(布)", "Congo-Brazzaville"],
51
+ "CD": ["CD", "Congo (Democratic Republic)", "DRC", "COD", "刚果(金)", "Democratic Republic of the Congo"],
52
+ "CK": ["CK", "Cook Islands", "CK", "库克群岛"],
53
+ "CR": ["CR", "Costa Rica", "Republic of Costa Rica", "CRI", "哥斯达黎加"],
54
+ "CI": ["CI", "Ivory Coast", "Côte d'Ivoire", "CIV", "科特迪瓦", "象牙海岸"],
55
+ "HR": ["HR", "Croatia", "Republic of Croatia", "HRV", "克罗地亚"],
56
+ "CU": ["CU", "Cuba", "Republic of Cuba", "CUB", "古巴"],
57
+ "CY": ["CY", "Cyprus", "Republic of Cyprus", "CYP", "塞浦路斯"],
58
+ "CZ": ["CZ", "Czechia", "Czech Republic", "CZE", "捷克", "捷克共和国"],
59
+ "DK": ["DK", "Denmark", "Kingdom of Denmark", "DNK", "丹麦"],
60
+ "DJ": ["DJ", "Djibouti", "Republic of Djibouti", "DJI", "吉布提"],
61
+ "DM": ["DM", "Dominica", "DM", "多米尼克"],
62
+ "DO": ["DO", "Dominican Republic", "DOM", "多米尼加共和国"],
63
+ "EC": ["EC", "Ecuador", "ECU", "厄瓜多尔"],
64
+ "EG": ["EG", "Egypt", "Arab Republic of Egypt", "EGY", "埃及"],
65
+ "SV": ["SV", "El Salvador", "SLV", "萨尔瓦多"],
66
+ "GQ": ["GQ", "Equatorial Guinea", "GNQ", "赤道几内亚"],
67
+ "ER": ["ER", "Eritrea", "ERI", "厄立特里亚"],
68
+ "EE": ["EE", "Estonia", "EST", "爱沙尼亚"],
69
+ "SZ": ["SZ", "Eswatini", "Kingdom of Eswatini", "SWZ", "斯威士兰", "埃斯瓦蒂尼"],
70
+ "ET": ["ET", "Ethiopia", "Federal Democratic Republic of Ethiopia", "ETH", "埃塞俄比亚"],
71
+ "FK": ["FK", "Falkland Islands", "FK", "福克兰群岛", "马尔维纳斯"],
72
+ "FO": ["FO", "Faroe Islands", "FO", "法罗群岛"],
73
+ "FJ": ["FJ", "Fiji", "FJI", "斐济"],
74
+ "FI": ["FI", "Finland", "Republic of Finland", "FIN", "芬兰"],
75
+ "FR": ["FR", "France", "French Republic", "FRA", "法国"],
76
+ "GF": ["GF", "French Guiana", "FG", "法属圭亚那"],
77
+ "PF": ["PF", "French Polynesia", "PF", "法属玻利尼西亚"],
78
+ "TF": ["TF", "French Southern Territories", "TF", "法属南部领地"],
79
+ "GA": ["GA", "Gabon", "Gabonese Republic", "GAB", "加蓬"],
80
+ "GM": ["GM", "Gambia", "The Gambia", "GMB", "冈比亚"],
81
+ "GE": ["GE", "Georgia", "GE", "格鲁吉亚"],
82
+ "DE": ["DE", "Germany", "Federal Republic of Germany", "DEU", "德国", "Deutschland"],
83
+ "GH": ["GH", "Ghana", "GHA", "加纳"],
84
+ "GI": ["GI", "Gibraltar", "GI", "直布罗陀"],
85
+ "GR": ["GR", "Greece", "Hellenic Republic", "GRC", "希腊"],
86
+ "GL": ["GL", "Greenland", "GL", "格陵兰"],
87
+ "GD": ["GD", "Grenada", "GRD", "格林纳达"],
88
+ "GP": ["GP", "Guadeloupe", "GP", "瓜德罗普"],
89
+ "GU": ["GU", "Guam", "GU", "关岛"],
90
+ "GT": ["GT", "Guatemala", "GTM", "危地马拉"],
91
+ "GG": ["GG", "Guernsey", "GG", "根西岛"],
92
+ "GN": ["GN", "Guinea", "GIN", "几内亚"],
93
+ "GW": ["GW", "Guinea-Bissau", "GNB", "几内亚比绍"],
94
+ "GY": ["GY", "Guyana", "GUY", "圭亚那"],
95
+ "HT": ["HT", "Haiti", "HTI", "海地"],
96
+ "HM": ["HM", "Heard Island and McDonald Islands", "HM", "赫德岛和麦克唐纳群岛"],
97
+ "VA": ["VA", "Holy See", "Vatican City", "VAT", "梵蒂冈", "教廷"],
98
+ "HN": ["HN", "Honduras", "HND", "洪都拉斯"],
99
+ "HK": ["HK", "Hong Kong", "Hong Kong SAR", "HK", "香港", "香港特别行政区"],
100
+ "HU": ["HU", "Hungary", "HUN", "匈牙利"],
101
+ "IS": ["IS", "Iceland", "ISL", "冰岛"],
102
+ "IN": ["IN", "India", "Republic of India", "IND", "印度"],
103
+ "ID": ["ID", "Indonesia", "Republic of Indonesia", "IDN", "印尼", "印度尼西亚"],
104
+ "IR": ["IR", "Iran", "Islamic Republic of Iran", "IRN", "伊朗"],
105
+ "IQ": ["IQ", "Iraq", "IRQ", "伊拉克"],
106
+ "IE": ["IE", "Ireland", "Republic of Ireland", "IRL", "爱尔兰"],
107
+ "IM": ["IM", "Isle of Man", "IM", "马恩岛"],
108
+ "IL": ["IL", "Israel", "State of Israel", "ISR", "以色列"],
109
+ "IT": ["IT", "Italy", "Italian Republic", "ITA", "意大利"],
110
+ "JM": ["JM", "Jamaica", "JAM", "牙买加"],
111
+ "JP": ["JP", "Japan", "JPN", "日本", "Nippon"],
112
+ "JE": ["JE", "Jersey", "JE", "泽西岛"],
113
+ "JO": ["JO", "Jordan", "JOR", "约旦"],
114
+ "KZ": ["KZ", "Kazakhstan", "Republic of Kazakhstan", "KAZ", "哈萨克斯坦"],
115
+ "KE": ["KE", "Kenya", "KEN", "肯尼亚"],
116
+ "KI": ["KI", "Kiribati", "KIR", "基里巴斯"],
117
+ "KP": ["KP", "North Korea", "Democratic People's Republic of Korea", "PRK", "朝鲜", "朝鲜民主主义人民共和国"],
118
+ "KR": ["KR", "South Korea", "Republic of Korea", "KOR", "韩国", "大韩民国", "Republic of Korea", "Korea, Republic of"],
119
+ "KW": ["KW", "Kuwait", "KWT", "科威特"],
120
+ "KG": ["KG", "Kyrgyzstan", "Kyrgyz Republic", "KGZ", "吉尔吉斯斯坦", "吉尔吉斯"],
121
+ "LA": ["LA", "Laos", "Lao People's Democratic Republic", "LAO", "老挝", "老挝人民民主共和国"],
122
+ "LV": ["LV", "Latvia", "LVA", "拉脱维亚"],
123
+ "LB": ["LB", "Lebanon", "LBN", "黎巴嫩"],
124
+ "LS": ["LS", "Lesotho", "LSO", "莱索托"],
125
+ "LR": ["LR", "Liberia", "LBR", "利比里亚"],
126
+ "LY": ["LY", "Libya", "Libyan Arab Jamahiriya", "LBY", "利比亚"],
127
+ "LI": ["LI", "Liechtenstein", "LIE", "列支敦士登"],
128
+ "LT": ["LT", "Lithuania", "LTU", "立陶宛"],
129
+ "LU": ["LU", "Luxembourg", "LUX", "卢森堡"],
130
+ "MO": ["MO", "Macau", "Macau SAR", "MO", "澳门", "澳门特别行政区"],
131
+ "MG": ["MG", "Madagascar", "MDG", "马达加斯加"],
132
+ "MW": ["MW", "Malawi", "MWI", "马拉维"],
133
+ "MY": ["MY", "Malaysia", "MYS", "马来西亚"],
134
+ "MV": ["MV", "Maldives", "MDV", "马尔代夫"],
135
+ "ML": ["ML", "Mali", "MLI", "马里"],
136
+ "MT": ["MT", "Malta", "MLT", "马耳他"],
137
+ "MH": ["MH", "Marshall Islands", "MHL", "马绍尔群岛"],
138
+ "MQ": ["MQ", "Martinique", "MQ", "马提尼克"],
139
+ "MR": ["MR", "Mauritania", "MRT", "毛里塔尼亚"],
140
+ "MU": ["MU", "Mauritius", "MUS", "毛里求斯"],
141
+ "YT": ["YT", "Mayotte", "YT", "马约特"],
142
+ "MX": ["MX", "Mexico", "United Mexican States", "MEX", "墨西哥"],
143
+ "FM": ["FM", "Micronesia", "Federated States of Micronesia", "FSM", "密克罗尼西亚"],
144
+ "MD": ["MD", "Moldova", "Republic of Moldova", "MDA", "摩尔多瓦"],
145
+ "MC": ["MC", "Monaco", "Principality of Monaco", "MCO", "摩纳哥"],
146
+ "MN": ["MN", "Mongolia", "MNG", "蒙古", "蒙古国"],
147
+ "ME": ["ME", "Montenegro", "MNE", "黑山"],
148
+ "MS": ["MS", "Montserrat", "MSR", "蒙特塞拉特"],
149
+ "MA": ["MA", "Morocco", "Kingdom of Morocco", "MAR", "摩洛哥"],
150
+ "MZ": ["MZ", "Mozambique", "MOZ", "莫桑比克"],
151
+ "MM": ["MM", "Myanmar", "Burma", "MMR", "缅甸", "缅甸联邦"],
152
+ "NA": ["NA", "Namibia", "NAM", "纳米比亚"],
153
+ "NR": ["NR", "Nauru", "NRU", "瑙鲁"],
154
+ "NP": ["NP", "Nepal", "NPL", "尼泊尔"],
155
+ "NL": ["NL", "Netherlands", "Holland", "NLD", "荷兰", "尼德兰"],
156
+ "NC": ["NC", "New Caledonia", "NC", "新喀里多尼亚"],
157
+ "NZ": ["NZ", "New Zealand", "NZ", "新西兰"],
158
+ "NI": ["NI", "Nicaragua", "NIC", "尼加拉瓜"],
159
+ "NE": ["NE", "Niger", "Niger Republic", "NER", "尼日尔"],
160
+ "NG": ["NG", "Nigeria", "NGA", "尼日利亚"],
161
+ "NU": ["NU", "Niue", "NIU", "纽埃"],
162
+ "NF": ["NF", "Norfolk Island", "NF", "诺福克岛"],
163
+ "MK": ["MK", "North Macedonia", "Republic of North Macedonia", "MKD", "北马其顿", "马其顿"],
164
+ "MP": ["MP", "Northern Mariana Islands", "MP", "北马里亚纳群岛"],
165
+ "NO": ["NO", "Norway", "NOR", "挪威"],
166
+ "OM": ["OM", "Oman", "OMN", "阿曼"],
167
+ "PK": ["PK", "Pakistan", "Islamic Republic of Pakistan", "PAK", "巴基斯坦"],
168
+ "PW": ["PW", "Palau", "PLW", "帕劳"],
169
+ "PS": ["PS", "Palestine", "State of Palestine", "PSE", "巴勒斯坦"],
170
+ "PA": ["PA", "Panama", "PAN", "巴拿马"],
171
+ "PG": ["PG", "Papua New Guinea", "PNG", "巴布亚新几内亚"],
172
+ "PY": ["PY", "Paraguay", "PRY", "巴拉圭"],
173
+ "PE": ["PE", "Peru", "PER", "秘鲁"],
174
+ "PH": ["PH", "Philippines", "PHL", "菲律宾"],
175
+ "PN": ["PN", "Pitcairn", "PN", "皮特凯恩群岛"],
176
+ "PL": ["PL", "Poland", "POL", "波兰"],
177
+ "PT": ["PT", "Portugal", "PRT", "葡萄牙"],
178
+ "PR": ["PR", "Puerto Rico", "PR", "波多黎各"],
179
+ "QA": ["QA", "Qatar", "QAT", "卡塔尔"],
180
+ "RE": ["RE", "Réunion", "REU", "留尼汪"],
181
+ "RO": ["RO", "Romania", "ROU", "罗马尼亚"],
182
+ "RU": ["RU", "Russia", "Russian Federation", "RUS", "俄罗斯", "俄国"],
183
+ "RW": ["RW", "Rwanda", "RWA", "卢旺达"],
184
+ "BL": ["BL", "Saint Barthélemy", "BL", "圣巴泰勒米"],
185
+ "SH": ["SH", "Saint Helena", "Saint Helena, Ascension and Tristan da Cunha", "SH", "圣赫勒拿"],
186
+ "KN": ["KN", "Saint Kitts and Nevis", "KNA", "圣基茨和尼维斯"],
187
+ "LC": ["LC", "Saint Lucia", "LCA", "圣卢西亚"],
188
+ "MF": ["MF", "Saint Martin", "MF", "圣马丁"],
189
+ "PM": ["PM", "Saint Pierre and Miquelon", "PM", "圣皮埃尔和密克隆"],
190
+ "VC": ["VC", "Saint Vincent and the Grenadines", "VCT", "圣文森特和格林纳丁斯"],
191
+ "WS": ["WS", "Samoa", "WSM", "萨摩亚"],
192
+ "SM": ["SM", "San Marino", "SMR", "圣马力诺"],
193
+ "ST": ["ST", "Sao Tome and Principe", "STP", "圣多美和普林西比"],
194
+ "SA": ["SA", "Saudi Arabia", "Kingdom of Saudi Arabia", "SAU", "沙特阿拉伯"],
195
+ "SN": ["SN", "Senegal", "SEN", "塞内加尔"],
196
+ "RS": ["RS", "Serbia", "SRB", "塞尔维亚"],
197
+ "SC": ["SC", "Seychelles", "SYC", "塞舌尔"],
198
+ "SL": ["SL", "Sierra Leone", "SLE", "塞拉利昂"],
199
+ "SG": ["SG", "Singapore", "SGP", "新加坡"],
200
+ "SX": ["SX", "Sint Maarten", "SX", "荷属圣马丁"],
201
+ "SK": ["SK", "Slovakia", "Slovak Republic", "SVK", "斯洛伐克"],
202
+ "SI": ["SI", "Slovenia", "SVN", "斯洛文尼亚"],
203
+ "SB": ["SB", "Solomon Islands", "SLB", "所罗门群岛"],
204
+ "SO": ["SO", "Somalia", "SOM", "索马里"],
205
+ "ZA": ["ZA", "South Africa", "Republic of South Africa", "ZAF", "南非"],
206
+ "GS": ["GS", "South Georgia and the South Sandwich Islands", "GS", "南乔治亚和南桑威奇群岛"],
207
+ "SS": ["SS", "South Sudan", "SSD", "南苏丹"],
208
+ "ES": ["ES", "Spain", "Kingdom of Spain", "ESP", "西班牙"],
209
+ "LK": ["LK", "Sri Lanka", "LKA", "斯里兰卡"],
210
+ "SD": ["SD", "Sudan", "SDN", "苏丹"],
211
+ "SR": ["SR", "Suriname", "SUR", "苏里南"],
212
+ "SJ": ["SJ", "Svalbard and Jan Mayen", "SJ", "斯瓦尔巴和扬马延"],
213
+ "SE": ["SE", "Sweden", "SWE", "瑞典"],
214
+ "CH": ["CH", "Switzerland", "Swiss Confederation", "CHE", "瑞士", "Swiss"],
215
+ "SY": ["SY", "Syria", "Syrian Arab Republic", "SYR", "叙利亚"],
216
+ "TW": ["TW", "Taiwan", "Taiwan, Province of China", "TWN", "台湾", "中华民国", "ROC"],
217
+ "TJ": ["TJ", "Tajikistan", "TJK", "塔吉克斯坦"],
218
+ "TZ": ["TZ", "Tanzania", "United Republic of Tanzania", "TZA", "坦桑尼亚"],
219
+ "TH": ["TH", "Thailand", "THA", "泰国"],
220
+ "TL": ["TL", "Timor-Leste", "East Timor", "TLS", "东帝汶", "蒂莫尔"],
221
+ "TG": ["TG", "Togo", "TGO", "多哥"],
222
+ "TK": ["TK", "Tokelau", "TKL", "托克劳"],
223
+ "TO": ["TO", "Tonga", "TON", "汤加"],
224
+ "TT": ["TT", "Trinidad and Tobago", "TTO", "特立尼达和多巴哥"],
225
+ "TN": ["TN", "Tunisia", "TUN", "突尼斯"],
226
+ "TR": ["TR", "Turkey", "Republic of Turkey", "TUR", "土耳其"],
227
+ "TM": ["TM", "Turkmenistan", "TKM", "土库曼斯坦"],
228
+ "TC": ["TC", "Turks and Caicos Islands", "TCA", "特克斯和凯科斯群岛"],
229
+ "TV": ["TV", "Tuvalu", "TUV", "图瓦卢"],
230
+ "UG": ["UG", "Uganda", "UGA", "乌干达"],
231
+ "UA": ["UA", "Ukraine", "UKR", "乌克兰"],
232
+ "AE": ["AE", "United Arab Emirates", "UAE", "阿拉伯联合酋长国", "阿联酋"],
233
+ "GB": ["GB", "United Kingdom", "United Kingdom of Great Britain and Northern Ireland", "UK", "GBR", "英国", "Britain", "Great Britain", "England", "UK"],
234
+ "US": ["US", "United States", "United States of America", "USA", "US", "America", "美利坚合众国", "美国"],
235
+ "UM": ["UM", "United States Minor Outlying Islands", "UM", "美国本土外小岛屿"],
236
+ "UY": ["UY", "Uruguay", "URY", "乌拉圭"],
237
+ "UZ": ["UZ", "Uzbekistan", "UZB", "乌兹别克斯坦"],
238
+ "VU": ["VU", "Vanuatu", "VUT", "瓦努阿图"],
239
+ "VE": ["VE", "Venezuela", "Bolivarian Republic of Venezuela", "VEN", "委内瑞拉"],
240
+ "VN": ["VN", "Vietnam", "Socialist Republic of Vietnam", "VNM", "越南"],
241
+ "VG": ["VG", "British Virgin Islands", "VG", "英属维尔京群岛"],
242
+ "VI": ["VI", "United States Virgin Islands", "VI", "美属维尔京群岛"],
243
+ "WF": ["WF", "Wallis and Futuna", "WF", "瓦利斯和富图纳"],
244
+ "EH": ["EH", "Western Sahara", "ESH", "西撒哈拉"],
245
+ "YE": ["YE", "Yemen", "YEM", "也门"],
246
+ "ZM": ["ZM", "Zambia", "ZMB", "赞比亚"],
247
+ "ZW": ["ZW", "Zimbabwe", "ZWE", "津巴布韦"]
248
+ }
@@ -0,0 +1,642 @@
1
+ import os
2
+ import httpx
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import re
7
+ import unicodedata
8
+ import random
9
+ from typing import Optional, Dict, Any, Union, List
10
+ from dotenv import load_dotenv
11
+ from mcp.server.fastmcp import FastMCP
12
+ from concurrent.futures import ThreadPoolExecutor
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ load_dotenv()
18
+
19
+ m = None
20
+ try:
21
+ import httpx as _httpx # type: ignore
22
+ m = _httpx
23
+ except Exception:
24
+ m = None
25
+
26
+ API_KEY: Optional[str] = os.getenv("FIRECRAWL_API_KEY")
27
+ mcp = FastMCP("firecrawl-mcp")
28
+
29
+ # 加载本地国家别名字典 (data/country_aliases.json)
30
+ # 简化索引:仅使用别名字典 ALIAS_MAP,并生成按字母序排列的别名列表 ALIAS_KEYS_SORTED。
31
+ # 使用自实现的 Quick Sort 对别名键进行排序,并用二分查找(binary search)做匹配查找。
32
+ ALIAS_MAP: Dict[str, str] = {}
33
+ ALIAS_KEYS_SORTED: list = []
34
+
35
+ _aliases_path = os.path.join(os.path.dirname(__file__), "data", "country_aliases.json")
36
+
37
+ # Quick Sort 实现(用于对别名字典键排序)
38
+ def quick_sort(arr: list) -> list:
39
+ if len(arr) <= 1:
40
+ return arr
41
+ pivot = arr[len(arr) // 2]
42
+ left = [x for x in arr if x < pivot]
43
+ middle = [x for x in arr if x == pivot]
44
+ right = [x for x in arr if x > pivot]
45
+ return quick_sort(left) + middle + quick_sort(right)
46
+
47
+ # 二分查找(在已排序的列表中查找精确匹配)
48
+ def binary_search(arr: list, target: str) -> Optional[int]:
49
+ lo, hi = 0, len(arr) - 1
50
+ while lo <= hi:
51
+ mid = (lo + hi) // 2
52
+ if arr[mid] == target:
53
+ return mid
54
+ if arr[mid] < target:
55
+ lo = mid + 1
56
+ else:
57
+ hi = mid - 1
58
+ return None
59
+
60
+ def normalize(text: str) -> str:
61
+ """归一化国家/地区名称:NFKD、去重音、转小写、去标点、折叠空白"""
62
+ if not text:
63
+ return ""
64
+ # Unicode normalize
65
+ s = unicodedata.normalize("NFKD", text)
66
+ # remove diacritics
67
+ s = "".join(ch for ch in s if not unicodedata.combining(ch))
68
+ # convert full-width to half-width and normalize spaces
69
+ s = s.replace("\u3000", " ")
70
+ s = s.strip().lower()
71
+ # remove punctuation except spaces
72
+ s = re.sub(r"[^\w\s'-]", " ", s, flags=re.UNICODE)
73
+ # replace underscores and multiple spaces with single space
74
+ s = re.sub(r"[_\s]+", " ", s).strip()
75
+ return s
76
+
77
+ def _generate_variants(alias: str) -> set:
78
+ """为 alias 生成若干变体以提高命中率(去标点、逗号重排等)"""
79
+ variants = set()
80
+ base = alias.strip()
81
+ variants.add(base)
82
+ # normalized base
83
+ n = normalize(base)
84
+ variants.add(n)
85
+ # remove punctuation version
86
+ variants.add(re.sub(r"[^\w\s]", "", n))
87
+ # if contains comma, try reorder segments: "Korea, South" -> "south korea"
88
+ if "," in base:
89
+ parts = [p.strip() for p in base.split(",") if p.strip()]
90
+ if len(parts) >= 2:
91
+ reordered = " ".join(reversed(parts))
92
+ variants.add(reordered)
93
+ variants.add(normalize(reordered))
94
+ # also add word-reordered variants for simple two-word names
95
+ parts = n.split()
96
+ if len(parts) == 2:
97
+ variants.add(" ".join(reversed(parts)))
98
+ return {v for v in variants if v}
99
+
100
+ try:
101
+ with open(_aliases_path, "r", encoding="utf-8") as f:
102
+ _forward = json.load(f)
103
+
104
+ # 仅基于别名字典构建 ALIAS_MAP(normalized alias -> alpha2)
105
+ for code, names in _forward.items():
106
+ code_up = code.upper()
107
+ if isinstance(names, list):
108
+ iter_names = names
109
+ else:
110
+ iter_names = [names]
111
+ for name in iter_names:
112
+ if not isinstance(name, str):
113
+ continue
114
+ for variant in _generate_variants(name):
115
+ key = normalize(variant)
116
+ if not key:
117
+ continue
118
+ # 后来的同名别名以最后一个为准(覆盖),保持简单明了
119
+ ALIAS_MAP[key] = code_up
120
+
121
+ # 使用 Quick Sort 对别名字典的键进行排序,供二分查找使用
122
+ ALIAS_KEYS_SORTED = quick_sort(list(ALIAS_MAP.keys()))
123
+
124
+ except FileNotFoundError:
125
+ logger.warning("国家别名字典未找到: %s", _aliases_path)
126
+ except Exception as e:
127
+ logger.warning("加载国家别名字典失败: %s", e)
128
+
129
+ USER_AGENT = "firecrawl_client/1.0"
130
+ API_ENDPOINTS = {
131
+ "search": "https://api.firecrawl.dev/v2/search",
132
+ "scrape": "https://api.firecrawl.dev/v2/scrape",
133
+
134
+ }
135
+ HTTP_TIMEOUT = 30.0
136
+
137
+ # 并发与重试相关配置(可通过环境变量调整)
138
+ FIRECRAWL_MAX_CONNECTIONS = int(os.getenv("FIRECRAWL_MAX_CONNECTIONS", "200"))
139
+ FIRECRAWL_KEEPALIVE = int(os.getenv("FIRECRAWL_KEEPALIVE", "20"))
140
+ FIRECRAWL_HTTP2 = os.getenv("FIRECRAWL_HTTP2", "0") == "1"
141
+ # 如果启用了 HTTP/2,确保环境中安装了 h2;否则回退为 False,避免 httpx 抛出 ImportError
142
+ if FIRECRAWL_HTTP2:
143
+ try:
144
+ import h2 # noqa: F401
145
+ except Exception:
146
+ logger.warning("FIRECRAWL_HTTP2 设置为启用,但未检测到 'h2' 包。将自动禁用 HTTP/2(请安装 httpx[http2] 以启用)。")
147
+ FIRECRAWL_HTTP2 = False
148
+
149
+ # Firecrawl 后端总体并发上限为每秒 300 请求;将默认全局并发上限适当调高为 200(可通过环境变量调整)
150
+ FIRECRAWL_MAX_CONCURRENT_REQUESTS = int(os.getenv("FIRECRAWL_MAX_CONCURRENT_REQUESTS", "200"))
151
+ FIRECRAWL_MAX_WORKERS = int(os.getenv("FIRECRAWL_MAX_WORKERS", "10"))
152
+ FIRECRAWL_RETRY_COUNT = int(os.getenv("FIRECRAWL_RETRY_COUNT", "3"))
153
+ FIRECRAWL_RETRY_BASE_DELAY = float(os.getenv("FIRECRAWL_RETRY_BASE_DELAY", "0.5"))
154
+
155
+ # per-endpoint 配置(通过环境变量传入 JSON 字符串,示例: '{"search":10,"scrape":2}')
156
+ try:
157
+ PER_ENDPOINT_MAX_CONCURRENT = json.loads(os.getenv("FIRECRAWL_ENDPOINT_CONCURRENCY", "{}"))
158
+ except Exception:
159
+ PER_ENDPOINT_MAX_CONCURRENT = {}
160
+
161
+ # per-endpoint 是否允许重试,默认允许(用于避免对非幂等接口重试)
162
+ try:
163
+ PER_ENDPOINT_ALLOW_RETRY = json.loads(os.getenv("FIRECRAWL_ENDPOINT_RETRYABLE", '{"search": true, "scrape": false}'))
164
+ except Exception:
165
+ PER_ENDPOINT_ALLOW_RETRY = {}
166
+
167
+ # 请求并发信号量(在 startup_all 时初始化)
168
+ REQUEST_SEMAPHORE = None
169
+
170
+ # endpoint -> asyncio.Semaphore 映射(在 startup_all 中根据 PER_ENDPOINT_MAX_CONCURRENT 初始化)
171
+ ENDPOINT_SEMAPHORES: Dict[str, asyncio.Semaphore] = {}
172
+
173
+ # 全局 httpx AsyncClient 管理类
174
+ class AsyncHttpClientManager:
175
+ # 当 httpx 不可用时,推迟到运行时抛出更明确的错误
176
+ _client: Optional["httpx.AsyncClient"] = None
177
+ _lock = asyncio.Lock()
178
+
179
+ @classmethod
180
+ async def startup(cls):
181
+ async with cls._lock:
182
+ if cls._client is None:
183
+ limits = httpx.Limits(
184
+ max_connections=FIRECRAWL_MAX_CONNECTIONS,
185
+ max_keepalive_connections=FIRECRAWL_KEEPALIVE
186
+ )
187
+ timeout_obj = httpx.Timeout(
188
+ connect=5.0,
189
+ read=20.0,
190
+ write=10.0,
191
+ pool=30.0,
192
+ timeout=HTTP_TIMEOUT
193
+ )
194
+ if m is None:
195
+ raise RuntimeError("依赖库 'httpx' 未安装,请先安装 httpx")
196
+ cls._client = m.AsyncClient(
197
+ timeout=timeout_obj,
198
+ headers={"User-Agent": USER_AGENT},
199
+ limits=limits,
200
+ http2=FIRECRAWL_HTTP2
201
+ )
202
+ logger.info("httpx AsyncClient 已启动 (max_connections=%d, keepalive=%d, http2=%s, timeout=%s)",
203
+ FIRECRAWL_MAX_CONNECTIONS, FIRECRAWL_KEEPALIVE, FIRECRAWL_HTTP2, HTTP_TIMEOUT)
204
+
205
+ @classmethod
206
+ def get_client(cls) -> httpx.AsyncClient:
207
+ if cls._client is None:
208
+ raise RuntimeError("AsyncHttpClientManager 未启动,请先调用startup()")
209
+ return cls._client
210
+
211
+ @classmethod
212
+ async def shutdown(cls):
213
+ async with cls._lock:
214
+ if cls._client:
215
+ await cls._client.aclose()
216
+ logger.info("httpx AsyncClient 已关闭")
217
+ cls._client = None
218
+
219
+
220
+ # 全局线程池执行器管理
221
+ class ThreadPoolManager:
222
+ _executor: Optional[ThreadPoolExecutor] = None
223
+ _max_workers = FIRECRAWL_MAX_WORKERS
224
+
225
+ @classmethod
226
+ def startup(cls, max_workers: int = 10):
227
+ if cls._executor is None:
228
+ cls._executor = ThreadPoolExecutor(max_workers=cls._max_workers)
229
+ logger.info(f"线程池启动,最大工作线程数: {cls._max_workers}")
230
+
231
+ @classmethod
232
+ def get_executor(cls) -> ThreadPoolExecutor:
233
+ if cls._executor is None:
234
+ raise RuntimeError("ThreadPoolManager 未启动,请先调用startup()")
235
+ return cls._executor
236
+
237
+ @classmethod
238
+ def shutdown(cls):
239
+ if cls._executor:
240
+ cls._executor.shutdown(wait=True)
241
+ logger.info("线程池已关闭")
242
+ cls._executor = None
243
+
244
+
245
+ def error_response(message: str, status_code: Optional[int] = None, extra: Optional[Dict[str, Any]] = None) -> str:
246
+ result: Dict[str, Any] = {
247
+ "success": False,
248
+ "error": True,
249
+ "message": message,
250
+ }
251
+ if status_code is not None:
252
+ result["status_code"] = status_code
253
+ if extra:
254
+ result.update(extra)
255
+ return json.dumps(result, ensure_ascii=False, indent=4)
256
+
257
+
258
+ def success_response(query_details: Dict[str, Any], results: Dict[str, Any]) -> str:
259
+ return json.dumps({
260
+ "success": True,
261
+ "query_details": query_details,
262
+ "results": results,
263
+ }, ensure_ascii=False, indent=4)
264
+
265
+
266
+ def get_country_code_alpha2(country_name: Optional[str]) -> str:
267
+ """
268
+ 国家代码解析(已简化):
269
+ - 仅基于别名字典 ALIAS_MAP 进行查找,使用 ALIAS_KEYS_SORTED + 二分查找匹配别名键。
270
+ - 优先在别名字典中查找传入参数(归一化后);若命中则返回对应 alpha2。
271
+ - 如果传入为两字母 ISO2 则作为后备直接返回大写。
272
+ - 未找到则默认返回 'US'。
273
+ """
274
+ # 处理空输入:直接使用默认 US
275
+ if not country_name:
276
+ return "US"
277
+
278
+ name = country_name.strip()
279
+ if not name:
280
+ return "US"
281
+
282
+ # 归一化并首先在 ALIAS_MAP 中查找(O(1))
283
+ norm = normalize(name)
284
+ if norm in ALIAS_MAP:
285
+ return ALIAS_MAP[norm]
286
+
287
+ # 对大规模别名列表,使用已排序键列表 + 二分查找
288
+ if ALIAS_KEYS_SORTED:
289
+ idx = binary_search(ALIAS_KEYS_SORTED, norm)
290
+ if idx is not None:
291
+ return ALIAS_MAP.get(ALIAS_KEYS_SORTED[idx], "US")
292
+
293
+ # 如果已经是两字母 ISO2,作为后备直接返回大写
294
+ if len(name) == 2 and name.isalpha():
295
+ return name.upper()
296
+
297
+ # 再尝试归一化的大写形式(例如传入 'USA')
298
+ u_norm = normalize(name.upper())
299
+ if u_norm in ALIAS_MAP:
300
+ return ALIAS_MAP[u_norm]
301
+ if ALIAS_KEYS_SORTED:
302
+ idx = binary_search(ALIAS_KEYS_SORTED, u_norm)
303
+ if idx is not None:
304
+ return ALIAS_MAP.get(ALIAS_KEYS_SORTED[idx], "US")
305
+
306
+ logger.info("未找到国家名称 '%s',使用默认国家码 US。", country_name)
307
+ return "US"
308
+
309
+
310
+ def validate_search_num(num_val: int) -> int:
311
+ if 1 <= num_val <= 100:
312
+ return num_val
313
+ logger.warning("搜索数量 %d 超出范围(1-100),使用默认值20。", num_val)
314
+ return 20
315
+
316
+
317
+ def map_search_time_to_tbs_param(time_period_str: Optional[str]) -> Optional[str]:
318
+ """
319
+ 将各种用户友好的时间范围字符串规范化为 Google tbs 参数格式之一:
320
+ qdr:h, qdr:d, qdr:w, qdr:m, qdr:y。
321
+
322
+ 支持的输入示例(中/英常见写法、缩写、带数字的写法等):
323
+ "hour", "hours", "小时", "24h", "过去24小时" -> qdr:h
324
+ "day", "days", "天", "过去7天", "last 7 days" -> qdr:d
325
+ "week", "weeks", "周", "最近一周" -> qdr:w
326
+ "month", "months", "月", "最近一月" -> qdr:m
327
+ "year", "years", "年", "最近一年" -> qdr:y
328
+
329
+ 规则说明:
330
+ - 若传入已是合法的 qdr: 值(qdr:h/d/w/m/y),原样返回。
331
+ - 使用按优先级(年->月->周->天->小时)匹配的正则规则集进行识别,匹配到就返回对应值。
332
+ - 未识别的输入返回 None(表示不设置 tbs)。
333
+ """
334
+ if not time_period_str:
335
+ return None
336
+
337
+ s = time_period_str.strip().lower()
338
+
339
+ # 如果已经是 qdr: 开头且是允许的值,直接返回(否则忽略)
340
+ allowed_qdr = {"qdr:h", "qdr:d", "qdr:w", "qdr:m", "qdr:y"}
341
+ if s.startswith("qdr:"):
342
+ return s if s in allowed_qdr else None
343
+
344
+ # 按优先级匹配:年 -> 月 -> 周 -> 天 -> 小时
345
+ # 各模式支持中/英、缩写、复数、带数字的写法(如 "24h", "7天")以及口语化表达("last week", "最近一周")
346
+ year_re = r"(?:\b(?:y|yr|yrs|year|years)\b|年|last\s+year|past\s+year|最近\s*\d*\s*年|过去\s*\d*\s*年|\d+\s*y\b|\d+\s*年)"
347
+ month_re = r"(?:\b(?:mo|mos|month|months)\b|月|last\s+month|past\s+month|最近\s*\d*\s*月|过去\s*\d*\s*月|\d+\s*mo\b|\d+\s*月)"
348
+ week_re = r"(?:\b(?:w|wk|wks|week|weeks)\b|周|星期|last\s+week|past\s+week|最近\s*\d*\s*周|过去\s*\d*\s*周|\d+\s*w\b|\d+\s*周)"
349
+ day_re = r"(?:\b(?:d|day|days)\b|天|日|daily|每?天|last\s+day|past\s+day|最近\s*\d*\s*天|过去\s*\d*\s*天|\d+\s*d\b|\d+\s*天)"
350
+ hour_re = r"(?:\b(?:h|hr|hrs|hour|hours)\b|小时|小时内|past\s+hour|last\s+hour|最近\s*\d*\s*小时|过去\s*\d*\s*小时|\d+\s*h\b|\d+\s*小时)"
351
+
352
+ try:
353
+ if re.search(year_re, s, flags=re.IGNORECASE):
354
+ return "qdr:y"
355
+ if re.search(month_re, s, flags=re.IGNORECASE):
356
+ return "qdr:m"
357
+ if re.search(week_re, s, flags=re.IGNORECASE):
358
+ return "qdr:w"
359
+ if re.search(day_re, s, flags=re.IGNORECASE):
360
+ return "qdr:d"
361
+ if re.search(hour_re, s, flags=re.IGNORECASE):
362
+ return "qdr:h"
363
+ except re.error:
364
+ # 万一正则出错,记录并返回 None
365
+ logger.exception("时间范围匹配正则错误,输入: %s", time_period_str)
366
+ return None
367
+
368
+ logger.info("未识别的时间偏好 '%s',忽略时间过滤。", time_period_str)
369
+ return None
370
+
371
+
372
+ async def execute_firecrawl_request(
373
+ api_url: str,
374
+ payload: Dict[str, Any],
375
+ api_name: str
376
+ ) -> Union[Dict[str, Any], None]:
377
+ """
378
+ 执行对 Firecrawl API 的异步请求,包含并发控制与重试策略。
379
+ 返回解析后的 JSON 或错误描述字典,或 None(当缺少 API_KEY 时)。
380
+ """
381
+ if not API_KEY:
382
+ logger.error("未配置FIRECRAWL_API_KEY,无法调用 %s 接口。", api_name)
383
+ return None
384
+
385
+ headers = {
386
+ "X-API-KEY": API_KEY,
387
+ "Content-Type": "application/json",
388
+ }
389
+
390
+ try:
391
+ client = AsyncHttpClientManager.get_client()
392
+ except RuntimeError as e:
393
+ logger.error("HTTP客户端未启动: %s", e)
394
+ return {"error": True, "message": f"{api_name}接口请求失败,HTTP客户端未启动。"}
395
+
396
+ logger.info("准备调用 %s 接口,payload: %s", api_name, payload)
397
+
398
+ # 选择用于该端点的 semaphore(优先 per-endpoint,其次全局 REQUEST_SEMAPHORE)
399
+ sem = ENDPOINT_SEMAPHORES.get(api_name, REQUEST_SEMAPHORE)
400
+ # 该端点是否允许重试(默认 True)
401
+ retry_allowed = PER_ENDPOINT_ALLOW_RETRY.get(api_name, True)
402
+
403
+ attempt = 0
404
+ while True:
405
+ try:
406
+ # 并发控制(如果已在 startup_all 中初始化)
407
+ if REQUEST_SEMAPHORE:
408
+ async with REQUEST_SEMAPHORE:
409
+ response = await client.post(api_url, json=payload, headers=headers)
410
+ else:
411
+ response = await client.post(api_url, json=payload, headers=headers)
412
+
413
+ # 检查 HTTP 状态
414
+ response.raise_for_status()
415
+
416
+ # 解析返回 JSON
417
+ try:
418
+ result = response.json()
419
+ except Exception as e:
420
+ logger.error("%s接口JSON解析错误: %s", api_name, e)
421
+ return {"error": True, "message": f"{api_name}接口响应解析失败: {e}"}
422
+
423
+ return result
424
+
425
+ except httpx.HTTPStatusError as e:
426
+ status = e.response.status_code if e.response is not None else None
427
+ logger.warning("%s 接口返回 HTTP 错误 %s (尝试 %d/%d)", api_name, status, attempt + 1, FIRECRAWL_RETRY_COUNT)
428
+ # 对 5xx 做重试(仅在端点允许重试时)
429
+ if retry_allowed and status and 500 <= status < 600 and attempt < FIRECRAWL_RETRY_COUNT:
430
+ attempt += 1
431
+ delay = FIRECRAWL_RETRY_BASE_DELAY * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
432
+ logger.info("对 %s 接口在 %s 秒后重试 (HTTP %s)", api_name, round(delay, 2), status)
433
+ await asyncio.sleep(delay)
434
+ continue
435
+ logger.error("%s接口HTTP错误 %s %s: %s", api_name, status, e.request.url if e.request else "", e)
436
+ return {
437
+ "error": True,
438
+ "message": f"{api_name}接口HTTP状态错误: {status}",
439
+ "status_code": status
440
+ }
441
+
442
+ except httpx.RequestError as e:
443
+ logger.warning("%s 接口请求错误 (尝试 %d/%d): %s", api_name, attempt + 1, FIRECRAWL_RETRY_COUNT, e)
444
+ if retry_allowed and attempt < FIRECRAWL_RETRY_COUNT:
445
+ attempt += 1
446
+ delay = FIRECRAWL_RETRY_BASE_DELAY * (2 ** (attempt - 1)) + random.uniform(0, 0.1)
447
+ logger.info("对 %s 接口在 %s 秒后重试 (请求错误)", api_name, round(delay, 2))
448
+ await asyncio.sleep(delay)
449
+ continue
450
+ logger.error("%s接口请求错误: %s", api_name, e)
451
+ return {"error": True, "message": f"{api_name}接口请求错误: {e}"}
452
+
453
+ except Exception as e:
454
+ logger.exception("%s接口未知错误: %s", api_name, e)
455
+ return {"error": True, "message": f"{api_name}接口未知错误: {e}"}
456
+
457
+
458
+ @mcp.tool(name="firecrawl-search")
459
+ async def firecrawl_search(
460
+ query: str,
461
+ country: Optional[str] = None,
462
+ search_num: int = 10,
463
+ search_time: Optional[str] = None,
464
+ ) -> str:
465
+ """
466
+ 通用搜索接口。
467
+ 参数:
468
+ query: 搜索关键词(必填)
469
+ country: 国家名称(可选),支持中文或英文国家名
470
+ search_num: 返回结果数量,1~100,默认20
471
+ search_time: 时间过滤,如“小时”,“天”,“周”,“月”,“年”,可选
472
+ 返回:
473
+ JSON格式字符串,包含查询参数和搜索结果。
474
+ """
475
+ api_name_key = "search"
476
+ api_url = API_ENDPOINTS.get(api_name_key)
477
+ if not api_url:
478
+ return error_response(f"未知API_KEY '{api_name_key}',无法处理请求。")
479
+
480
+ # 解析并规范化国家代码为 ISO2 大写
481
+ country_code = get_country_code_alpha2(country)
482
+ logger.info("search country param '%s' -> resolved country_code: %s", country, country_code)
483
+
484
+ # 构建固定的 payload
485
+ payload: Dict[str, Any] = {
486
+ "query": query,
487
+ "limit": validate_search_num(search_num),
488
+ "sources": [{"type": "web"}, {"type": "news"}, {"type": "images"}],
489
+ # tbs 可选:根据传入的 search_time 映射
490
+ "country": country_code,
491
+ "timeout": 60000,
492
+ "ignoreInvalidURLs": False,
493
+ "scrapeOptions": {
494
+ "formats": [],
495
+ "onlyMainContent": True,
496
+ "maxAge": 172800000,
497
+ "waitFor": 0,
498
+ "mobile": False,
499
+ "skipTlsVerification": True,
500
+ "timeout": 30000,
501
+ "parsers": [],
502
+ "location": {
503
+ "country": country_code
504
+ },
505
+ "removeBase64Images": True,
506
+ "blockAds": True,
507
+ "proxy": "auto",
508
+ "storeInCache": True
509
+ }
510
+ }
511
+
512
+ tbs_val = map_search_time_to_tbs_param(search_time)
513
+ if tbs_val:
514
+ payload["tbs"] = tbs_val
515
+
516
+ # 调用通用请求执行器发送请求并返回结果
517
+ result = await execute_firecrawl_request(api_url, payload, api_name_key)
518
+ if result is None:
519
+ return error_response(f"{api_name_key}请求失败,接口响应为空。")
520
+ if isinstance(result, dict) and result.get("error"):
521
+ return json.dumps({
522
+ "success": False,
523
+ "query_details": payload,
524
+ "error": result.get("error"),
525
+ "message": result.get("message", "未知错误"),
526
+ "status_code": result.get("status_code", None),
527
+ }, ensure_ascii=False, indent=4)
528
+ return success_response(payload, result)
529
+
530
+
531
+ @mcp.tool(name="firecrawl-scrape")
532
+ async def firecrawl_scrape(
533
+ url: str,
534
+ ) -> str:
535
+ """
536
+ 网页内容抓取接口。
537
+ 参数:
538
+ url: 目标网页URL(必填)
539
+ 返回:
540
+ JSON字符串
541
+ """
542
+ api_name_key = "scrape"
543
+ api_url = API_ENDPOINTS.get(api_name_key)
544
+ if not api_url:
545
+ return error_response(f"未知API_KEY '{api_name_key}',无法处理请求。")
546
+
547
+ if not url or not isinstance(url, str) or not url.strip():
548
+ return error_response("参数 url 必填且不能为空字符串。")
549
+
550
+ payload: Dict[str, Any] = {
551
+ "url": url,
552
+ "formats": ["markdown"],
553
+ "onlyMainContent": True,
554
+ "includeTags": [],
555
+ "excludeTags": [],
556
+ "maxAge": 172800000,
557
+ "headers": {},
558
+ "waitFor": 0,
559
+ "mobile": False,
560
+ "skipTlsVerification": True,
561
+ "timeout": 30000,
562
+ "parsers": ["pdf"],
563
+ "removeBase64Images": True,
564
+ "blockAds": True,
565
+ "proxy": "auto",
566
+ "storeInCache": True,
567
+ }
568
+
569
+ result = await execute_firecrawl_request(api_url, payload, api_name_key)
570
+ if result is None:
571
+ return error_response(f"{api_name_key}请求失败,接口响应为空。")
572
+ if isinstance(result, dict) and result.get("error"):
573
+ return json.dumps({
574
+ "success": False,
575
+ "query_details": payload,
576
+ "error": result.get("error"),
577
+ "message": result.get("message", "未知错误"),
578
+ "status_code": result.get("status_code", None),
579
+ }, ensure_ascii=False, indent=4)
580
+ return success_response(payload, result)
581
+
582
+
583
+ # 示例:同步阻塞函数,通过线程池异步调用
584
+ async def run_blocking_task_in_threadpool(blocking_func, *args, **kwargs):
585
+ loop = asyncio.get_running_loop()
586
+ executor = ThreadPoolManager.get_executor()
587
+ return await loop.run_in_executor(executor, lambda: blocking_func(*args, **kwargs))
588
+
589
+
590
+ async def startup_all():
591
+ global REQUEST_SEMAPHORE
592
+ await AsyncHttpClientManager.startup()
593
+ ThreadPoolManager.startup(max_workers=FIRECRAWL_MAX_WORKERS)
594
+ # 初始化请求并发信号量
595
+ REQUEST_SEMAPHORE = asyncio.Semaphore(FIRECRAWL_MAX_CONCURRENT_REQUESTS)
596
+ logger.info("已初始化请求并发控制,最大并发请求数: %d,线程池最大工作线程: %d", FIRECRAWL_MAX_CONCURRENT_REQUESTS, FIRECRAWL_MAX_WORKERS)
597
+ # 可以扩展这里做更多初始化
598
+
599
+
600
+ async def shutdown_all():
601
+ await AsyncHttpClientManager.shutdown()
602
+ ThreadPoolManager.shutdown()
603
+ # 可以扩展这里做更多清理
604
+
605
+
606
+ def main():
607
+ """
608
+ The main synchronous entry point for the MCP server.
609
+ Manages the asyncio event loop to run async setup/teardown around the blocking mcp.run() call.
610
+ """
611
+ if not API_KEY:
612
+ logger.error(
613
+ "警告:环境变量FIRECRAWL_API_KEY未设置,启动后所有接口调用均不可用。"
614
+ "请在.env文件或环境变量中配置。"
615
+ )
616
+ else:
617
+ logger.info("加载到FIRECRAWL_API_KEY,准备启动Firecrawl MCP工具接口服务。")
618
+
619
+ # Manually manage the event loop
620
+ loop = asyncio.new_event_loop()
621
+ asyncio.set_event_loop(loop)
622
+
623
+ try:
624
+ # Run the async startup tasks
625
+ loop.run_until_complete(startup_all())
626
+ logger.info("Firecrawl MCP 工具服务已启动。")
627
+
628
+ # Now, run the blocking MCP server loop
629
+ mcp.run(transport="stdio")
630
+
631
+ except KeyboardInterrupt:
632
+ logger.info("接收到中断信号,正在关闭服务...")
633
+ finally:
634
+ logger.info("开始关闭异步资源...")
635
+ # Run the async shutdown tasks
636
+ loop.run_until_complete(shutdown_all())
637
+ loop.close()
638
+ logger.info("Firecrawl MCP工具接口服务已安全关闭。")
639
+
640
+ # This block is for direct execution via 'python -m firecrawl_toolkit.server'
641
+ if __name__ == "__main__":
642
+ main()
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.4
2
+ Name: firecrawl-toolkit
3
+ Version: 0.0.1
4
+ Summary: A high-performance, asynchronous MCP server for Firecrawl Search, featuring connection pooling, request retries, and intelligent input parsing.
5
+ Author-email: "Joey.Kot" <joey.kot.x@gmail.com>
6
+ Keywords: firecrawl,mcp,server,google,search
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: httpx>=0.28.1
10
+ Requires-Dist: h2>=4.2.0
11
+ Requires-Dist: dotenv>=0.9.9
12
+ Requires-Dist: mcp>=1.9.4
13
+
14
+ # Firecrawl MCP Toolkit
15
+
16
+ A high-performance, asynchronous MCP server that provides comprehensive Google search and web content scraping capabilities through the Firecrawl API (excluding some rarely used interfaces).
17
+
18
+ This project is built on `httpx`, utilizing asynchronous clients and connection pool management to offer LLMs a stable and efficient external information retrieval tool.
19
+
20
+ ## Key Features
21
+
22
+ - **Asynchronous Architecture**: Fully based on `asyncio` and `httpx`, ensuring high throughput and non-blocking I/O operations.
23
+ - **HTTP Connection Pool**: Manages and reuses TCP connections through a global `httpx.AsyncClient` instance, significantly improving performance under high concurrency.
24
+ - **Concurrency Control**: Built-in global and per-API endpoint concurrency semaphores effectively manage API request rates to prevent exceeding rate limits.
25
+ - **Automatic Retry Mechanism**: Integrated request retry functionality with exponential backoff strategy automatically handles temporary network fluctuations or server errors, enhancing service stability.
26
+ - **Intelligent Country Code Parsing**: Includes a comprehensive country name dictionary supporting inputs in Chinese, English, ISO Alpha-2/3, and other formats, with automatic normalization.
27
+ - **Flexible Environment Variable Configuration**: Supports fine-tuned service configuration via environment variables.
28
+
29
+ ## Available Tools
30
+
31
+ This service provides the following tools:
32
+
33
+ | Tool Name | Description |
34
+ | ------------------------ | -------------------------------------------- |
35
+ | `firecrawl-search` | Performs general Google web / news / images searches. |
36
+ | `firecrawl-scrape` | Scrapes and returns the content of a specified URL. |
37
+
38
+ ## Installation Guide
39
+
40
+ It is recommended to install using `pip` or `uv`.
41
+
42
+ ```bash
43
+ # Using pip
44
+ pip install firecrawl-toolkit
45
+
46
+ # Or using uv
47
+ uv pip install firecrawl-toolkit
48
+ ```
49
+
50
+ ## Quick Start
51
+
52
+ ### Set Environment Variables
53
+
54
+ Create a `.env` file in the project root directory and enter your Firecrawl API key:
55
+
56
+ ```bash
57
+ FIRECRAWL_API_KEY="your-firecrawl-api-key-here"
58
+ ```
59
+
60
+ ### Configure MCP Client
61
+
62
+ Add the following server configuration in the MCP client configuration file:
63
+
64
+ ```json
65
+ {
66
+ "mcpServers": {
67
+ "firecrawl": {
68
+ "command": "python3",
69
+ "args": ["-m", "firecrawl-toolkit"],
70
+ "env": {
71
+ "FIRECRAWL_API_KEY": "<Your Firecrawl API key>"
72
+ }
73
+ }
74
+ }
75
+ }
76
+ ```
77
+
78
+ ```json
79
+ {
80
+ "mcpServers": {
81
+ "firecrawl": {
82
+ "command": "uvx",
83
+ "args": ["firecrawl-toolkit"],
84
+ "env": {
85
+ "FIRECRAWL_API_KEY": "<Your Firecrawl API key>"
86
+ }
87
+ }
88
+ }
89
+ }
90
+ ```
91
+
92
+ ### Environment Variables
93
+
94
+ - `FIRECRAWL_MAX_CONNECTIONS`: Maximum number of HTTP client connections (default: 200).
95
+ - `FIRECRAWL_KEEPALIVE`: Maximum number of keep-alive HTTP client connections (default: 20).
96
+ - `FIRECRAWL_HTTP2`: Enable HTTP/2 (default: "0", set to "1" to enable).
97
+ - `FIRECRAWL_MAX_CONCURRENT_REQUESTS`: Global maximum concurrent requests (default: 200).
98
+ - `FIRECRAWL_RETRY_COUNT`: Maximum retry attempts for failed requests (default: 3).
99
+ - `FIRECRAWL_RETRY_BASE_DELAY`: Base delay time for retries in seconds (default: 0.5).
100
+ - `FIRECRAWL_ENDPOINT_CONCURRENCY`: Set concurrency per endpoint (JSON format), e.g., {"search":10,"scrape":2}.
101
+ - `FIRECRAWL_ENDPOINT_RETRYABLE`: Set retry allowance per endpoint (JSON format), e.g., {"scrape": false}.
102
+
103
+ ## Tool Parameters and Usage Examples
104
+
105
+ ### firecrawl-search: Perform web / news / images search
106
+
107
+ Parameters:
108
+
109
+ - `query` (str, required): Keywords to search.
110
+ - `country` (str, optional): Specify the country/region for search results. Supports Chinese names (e.g., "China"), English names (e.g., "United States"), or ISO codes (e.g., "US"). Default is "US".
111
+ - `search_num` (int, optional): Number of results to return, range 1-100. Default is 10.
112
+ - `search_time` (str, optional): Filter results by time range. Available values: "hour", "day", "week", "month", "year".
113
+
114
+ Example:
115
+
116
+ ```Python
117
+ result_json = firecrawl_general_search(
118
+ query="AI advancements 2024",
119
+ country="United States",
120
+ search_num=5,
121
+ search_time="month"
122
+ )
123
+ ```
124
+
125
+ ### firecrawl-scrape: Scrape webpage content
126
+
127
+ Parameters:
128
+
129
+ - `url` (str, required): URL of the target webpage.
130
+
131
+ Example:
132
+
133
+ ```Python
134
+ result_json = firecrawl_scrape(
135
+ url="https://www.example.com"
136
+ )
137
+ ```
138
+
139
+ ## License Agreement
140
+
141
+ This project is licensed under the MIT License.
@@ -0,0 +1,9 @@
1
+ firecrawl_toolkit/__init__.py,sha256=u4K9tem9kkS7WI1nMAmMJsnWJxKQZ1jLHLZfmuM8VAk,71
2
+ firecrawl_toolkit/__main__.py,sha256=-fcn6jYFsKemDYTy_dea1azU4YxjVredpkEvszxrd1I,163
3
+ firecrawl_toolkit/server.py,sha256=qVDW53BrN9YRigC9YOkzGbLUC3nu3PJ6eq6nQdwrgfM,24609
4
+ firecrawl_toolkit/data/country_aliases.json,sha256=c2ScVZ4jyKocn_F7PYo-2xdmoOyb1fHn-qfLeMSqG94,15845
5
+ firecrawl_toolkit-0.0.1.dist-info/METADATA,sha256=o0k2GmbA4t8TdfMxPc9uZra4xsTEEwN7SP05nqmxKaU,4922
6
+ firecrawl_toolkit-0.0.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
7
+ firecrawl_toolkit-0.0.1.dist-info/entry_points.txt,sha256=a6BYr8iEhqCNQ9oyS1gDq79_rBrEftU7THkvtglHbuM,70
8
+ firecrawl_toolkit-0.0.1.dist-info/top_level.txt,sha256=8Hv_iBv0Rl-Sk84NqMcNTHA_dQp3VCLLGrTr6716jFM,18
9
+ firecrawl_toolkit-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ firecrawl-toolkit = firecrawl_toolkit.__main__:main
@@ -0,0 +1 @@
1
+ firecrawl_toolkit