cobweb-launcher 1.0.5__py3-none-any.whl → 3.2.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. cobweb/__init__.py +5 -1
  2. cobweb/base/__init__.py +3 -3
  3. cobweb/base/common_queue.py +37 -16
  4. cobweb/base/item.py +40 -14
  5. cobweb/base/{log.py → logger.py} +3 -3
  6. cobweb/base/request.py +744 -47
  7. cobweb/base/response.py +381 -13
  8. cobweb/base/seed.py +98 -50
  9. cobweb/base/task_queue.py +180 -0
  10. cobweb/base/test.py +257 -0
  11. cobweb/constant.py +39 -2
  12. cobweb/crawlers/__init__.py +1 -2
  13. cobweb/crawlers/crawler.py +27 -0
  14. cobweb/db/__init__.py +1 -0
  15. cobweb/db/api_db.py +83 -0
  16. cobweb/db/redis_db.py +118 -27
  17. cobweb/launchers/__init__.py +3 -1
  18. cobweb/launchers/distributor.py +141 -0
  19. cobweb/launchers/launcher.py +103 -130
  20. cobweb/launchers/uploader.py +68 -0
  21. cobweb/log_dots/__init__.py +2 -0
  22. cobweb/log_dots/dot.py +258 -0
  23. cobweb/log_dots/loghub_dot.py +53 -0
  24. cobweb/pipelines/__init__.py +3 -2
  25. cobweb/pipelines/pipeline.py +19 -0
  26. cobweb/pipelines/pipeline_csv.py +25 -0
  27. cobweb/pipelines/pipeline_loghub.py +54 -0
  28. cobweb/schedulers/__init__.py +1 -0
  29. cobweb/schedulers/scheduler.py +66 -0
  30. cobweb/schedulers/scheduler_with_redis.py +189 -0
  31. cobweb/setting.py +37 -38
  32. cobweb/utils/__init__.py +5 -2
  33. cobweb/utils/bloom.py +58 -0
  34. cobweb/{base → utils}/decorators.py +14 -12
  35. cobweb/utils/dotting.py +300 -0
  36. cobweb/utils/oss.py +113 -86
  37. cobweb/utils/tools.py +3 -15
  38. cobweb_launcher-3.2.18.dist-info/METADATA +193 -0
  39. cobweb_launcher-3.2.18.dist-info/RECORD +44 -0
  40. {cobweb_launcher-1.0.5.dist-info → cobweb_launcher-3.2.18.dist-info}/WHEEL +1 -1
  41. cobweb/crawlers/base_crawler.py +0 -121
  42. cobweb/crawlers/file_crawler.py +0 -181
  43. cobweb/launchers/launcher_pro.py +0 -174
  44. cobweb/pipelines/base_pipeline.py +0 -54
  45. cobweb/pipelines/loghub_pipeline.py +0 -34
  46. cobweb_launcher-1.0.5.dist-info/METADATA +0 -48
  47. cobweb_launcher-1.0.5.dist-info/RECORD +0 -32
  48. {cobweb_launcher-1.0.5.dist-info → cobweb_launcher-3.2.18.dist-info}/LICENSE +0 -0
  49. {cobweb_launcher-1.0.5.dist-info → cobweb_launcher-3.2.18.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,180 @@
1
+ import time
2
+ import threading
3
+ from enum import Enum
4
+ from hashlib import md5
5
+ from dataclasses import dataclass
6
+ from typing import Dict, Any, Optional, List
7
+
8
+
9
+ class Status(Enum):
10
+ PENDING = 0 # 待处理
11
+ PROCESSING = 1 # 处理中
12
+ FINISHED = 2 # 已完成
13
+ INSERT = 3 # 新增
14
+ UPLOAD = 4 # 上传
15
+
16
+
17
+ @dataclass
18
+ class Task:
19
+ task_id: str # 种子唯一ID
20
+ data: Any # 种子内容
21
+ status: Status # 当前状态
22
+ priority: int # 优先级(数值越小越优先)
23
+ created_at: float # 创建时间戳
24
+ parent_id: Optional[str] = None # 父种子 ID
25
+ children_ids: List[str] = None # 子种子 ID 列表
26
+ ttl_seconds: Optional[int] = None # 可选 TTL 时间(秒)
27
+
28
+ def __post_init__(self):
29
+ if self.children_ids is None:
30
+ self.children_ids = []
31
+
32
+
33
+ class TaskQueue:
34
+
35
+ def __init__(self, cleanup_interval=60):
36
+ self._tasks: Dict[str, Task] = {}
37
+ self._lock = threading.Lock()
38
+ # self.cleanup_interval = cleanup_interval
39
+ # self._start_cleanup_task()
40
+
41
+ # def _start_cleanup_task(self):
42
+ # """启动后台线程清理过期种子"""
43
+ # def run():
44
+ # while True:
45
+ # time.sleep(self.cleanup_interval)
46
+ # self._cleanup_expired_seeds()
47
+ # threading.Thread(target=run, daemon=True).start()
48
+
49
+ def length(self) -> int:
50
+ with self._lock:
51
+ return len(self._tasks)
52
+
53
+ def status_length(self, status) -> int:
54
+ with self._lock:
55
+ return len([it for it in self._tasks.values() if it.status == status])
56
+
57
+ def get_task(self, task_id) -> Task:
58
+ with self._lock:
59
+ if task_id in self._tasks:
60
+ return self._tasks[task_id]
61
+
62
+ def get_task_by_status(self, status: list, limit: int = None) -> List[Task]:
63
+ with self._lock:
64
+ if not isinstance(status, list):
65
+ status = [status]
66
+ task_list = [it for it in self._tasks.values() if it.status in status]
67
+ task_list.sort(key=lambda x: (x.priority, x.created_at))
68
+ return task_list[:limit] if limit else task_list
69
+
70
+ def get_pending_task(self) -> Task:
71
+ with self._lock:
72
+ if items := [it for it in self._tasks.values() if it.status == Status.PENDING]:
73
+ items.sort(key=lambda x: (x.priority, x.created_at))
74
+ task_item = items[0]
75
+ task_item.status = Status.PROCESSING
76
+ self._tasks[task_item.task_id] = task_item
77
+ return task_item
78
+
79
+ def pop_task(self, status) -> Task:
80
+ with self._lock:
81
+ if items := [it for it in self._tasks.values() if it.status == status]:
82
+ items.sort(key=lambda x: (x.priority, x.created_at))
83
+ task_item = items[0]
84
+
85
+ to_remove = set()
86
+ queue = [task_item.task_id]
87
+
88
+ while queue:
89
+ current = queue.pop(0)
90
+ if current in self._tasks:
91
+ to_remove.add(current)
92
+ queue.extend(self._tasks[current].children_ids)
93
+ del self._tasks[current]
94
+
95
+ for tid in to_remove:
96
+ if task_item := self._tasks.get(tid):
97
+ if task_item.parent_id in self._tasks:
98
+ if tid in self._tasks[task_item.parent_id].children_ids:
99
+ self._tasks[task_item.parent_id].children_ids.remove(tid)
100
+
101
+ def add_task(
102
+ self,
103
+ task_id: str = None,
104
+ data: Any = None,
105
+ status=Status.PENDING,
106
+ priority: int = 500,
107
+ parent_id: Optional[str] = None,
108
+ ttl_seconds: Optional[int] = None
109
+ ) -> bool:
110
+ """添加新种子,可指定父种子"""
111
+ with self._lock:
112
+ if not task_id:
113
+ task_id = md5(str(time.time()).encode()).hexdigest()
114
+
115
+ if task_id in self._tasks:
116
+ return False # 防止重复添加
117
+
118
+ task_item = Task(
119
+ task_id=task_id,
120
+ data=data,
121
+ status=status,
122
+ priority=priority,
123
+ created_at=int(time.time()),
124
+ parent_id=parent_id,
125
+ ttl_seconds=ttl_seconds
126
+ )
127
+ self._tasks[task_id] = task_item
128
+
129
+ if parent_id and parent_id in self._tasks:
130
+ self._tasks[parent_id].children_ids.append(task_id)
131
+
132
+ return True
133
+
134
+ def update_task(self, task_id, status, data=None) -> Task:
135
+ with self._lock:
136
+ task_item = self._tasks[task_id]
137
+ task_item.status = status
138
+ if data:
139
+ task_item.data = data
140
+
141
+ if task_item.status != Status.FINISHED:
142
+ for tid in task_item.children_ids:
143
+ if self._tasks[tid].status == Status.INSERT:
144
+ del self._tasks[tid]
145
+
146
+ task_item.children_ids = []
147
+ self._tasks[task_id] = task_item
148
+
149
+ return task_item
150
+
151
+ def remove(self, task_ids: list) -> bool:
152
+ with self._lock:
153
+ for task_id in task_ids:
154
+ if task_item := self._tasks.get(task_id):
155
+
156
+ if task_item.children_ids:
157
+ continue
158
+
159
+ if task_item.parent_id in self._tasks:
160
+ if task_id in self._tasks[task_item.parent_id].children_ids:
161
+ self._tasks[task_item.parent_id].children_ids.remove(task_id)
162
+
163
+ del self._tasks[task_id]
164
+
165
+ def count_children(self, task_id: str) -> int:
166
+ with self._lock:
167
+ if task_id in self._tasks:
168
+ return len(self._tasks[task_id].children_ids)
169
+ return 0
170
+
171
+ # def _cleanup_expired_seeds(self):
172
+ # now = time.time()
173
+ # expired_ids = []
174
+ # with self._lock:
175
+ # for seed_id, seed in self._seeds.items():
176
+ # if seed.ttl_seconds and now - seed.created_at > seed.ttl_seconds:
177
+ # expired_ids.append(seed_id)
178
+ # for seed_id in expired_ids:
179
+ # self._seeds[seed_id] = self._seeds[seed_id]._replace(status=SeedStatus.EXPIRED)
180
+ # print(f"清理了 {len(expired_ids)} 个过期种子")
cobweb/base/test.py ADDED
@@ -0,0 +1,257 @@
1
+ import requests
2
+ from urllib.parse import urlparse
3
+ from typing import Dict, Optional
4
+
5
+
6
+ class FileTypeDetector:
7
+ def __init__(self):
8
+ self.file_signatures = {
9
+ # 图片格式
10
+ b'\x89PNG\r\n\x1a\n': 'PNG',
11
+ b'\xff\xd8\xff': 'JPEG',
12
+ b'GIF87a': 'GIF',
13
+ b'GIF89a': 'GIF',
14
+ b'RIFF': 'WEBP', # 需要进一步检查
15
+ b'BM': 'BMP',
16
+ b'II*\x00': 'TIFF',
17
+ b'MM\x00*': 'TIFF',
18
+ b'\x00\x00\x01\x00': 'ICO',
19
+ b'\x00\x00\x02\x00': 'CUR',
20
+
21
+ # 视频格式
22
+ b'\x00\x00\x00\x18ftypmp4': 'MP4',
23
+ b'\x00\x00\x00\x20ftypM4V': 'M4V',
24
+ b'FLV\x01': 'FLV',
25
+ b'\x1aE\xdf\xa3': 'WEBM',
26
+ b'RIFF': 'AVI', # 需要进一步检查
27
+ b'\x00\x00\x01\xba': 'MPEG',
28
+ b'\x00\x00\x01\xb3': 'MPEG',
29
+ b'OggS': 'OGV',
30
+
31
+ # 音频格式
32
+ b'ID3': 'MP3',
33
+ b'\xff\xfb': 'MP3',
34
+ b'\xff\xf3': 'MP3',
35
+ b'\xff\xf2': 'MP3',
36
+ b'fLaC': 'FLAC',
37
+ b'RIFF': 'WAV', # 需要进一步检查
38
+ b'OggS': 'OGG', # 需要进一步检查
39
+ b'ftypM4A': 'M4A',
40
+ b'MAC ': 'APE',
41
+
42
+ # 其他格式
43
+ b'%PDF': 'PDF',
44
+ b'PK\x03\x04': 'ZIP',
45
+ b'Rar!\x1a\x07\x00': 'RAR',
46
+ b'\x37\x7a\xbc\xaf\x27\x1c': '7Z',
47
+ }
48
+
49
+ # 扩展名映射
50
+ self.extension_map = {
51
+ # 图片
52
+ '.jpg': 'JPEG', '.jpeg': 'JPEG', '.png': 'PNG', '.gif': 'GIF',
53
+ '.webp': 'WEBP', '.bmp': 'BMP', '.tiff': 'TIFF', '.tif': 'TIFF',
54
+ '.ico': 'ICO', '.svg': 'SVG', '.heic': 'HEIC', '.avif': 'AVIF',
55
+
56
+ # 视频
57
+ '.mp4': 'MP4', '.avi': 'AVI', '.mov': 'MOV', '.wmv': 'WMV',
58
+ '.flv': 'FLV', '.webm': 'WEBM', '.mkv': 'MKV', '.m4v': 'M4V',
59
+ '.mpg': 'MPEG', '.mpeg': 'MPEG', '.3gp': '3GP', '.ogv': 'OGV',
60
+ '.ts': 'TS', '.mts': 'MTS', '.vob': 'VOB',
61
+
62
+ # 音频
63
+ '.mp3': 'MP3', '.wav': 'WAV', '.flac': 'FLAC', '.aac': 'AAC',
64
+ '.ogg': 'OGG', '.wma': 'WMA', '.m4a': 'M4A', '.ape': 'APE',
65
+ '.opus': 'OPUS', '.aiff': 'AIFF', '.au': 'AU',
66
+ }
67
+
68
+ # MIME类型映射
69
+ self.mime_type_map = {
70
+ # 图片
71
+ 'image/jpeg': 'JPEG', 'image/png': 'PNG', 'image/gif': 'GIF',
72
+ 'image/webp': 'WEBP', 'image/bmp': 'BMP', 'image/tiff': 'TIFF',
73
+ 'image/svg+xml': 'SVG', 'image/x-icon': 'ICO',
74
+
75
+ # 视频
76
+ 'video/mp4': 'MP4', 'video/avi': 'AVI', 'video/quicktime': 'MOV',
77
+ 'video/x-msvideo': 'AVI', 'video/webm': 'WEBM', 'video/x-flv': 'FLV',
78
+ 'video/3gpp': '3GP', 'video/ogg': 'OGV',
79
+
80
+ # 音频
81
+ 'audio/mpeg': 'MP3', 'audio/wav': 'WAV', 'audio/flac': 'FLAC',
82
+ 'audio/aac': 'AAC', 'audio/ogg': 'OGG', 'audio/x-ms-wma': 'WMA',
83
+ 'audio/mp4': 'M4A', 'audio/opus': 'OPUS',
84
+ }
85
+
86
+ self.session = requests.Session()
87
+ self.session.headers.update({
88
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
89
+ })
90
+
91
+ def get_file_extension(self, url: str) -> str:
92
+ """从URL获取文件扩展名"""
93
+ parsed = urlparse(url)
94
+ path = parsed.path.lower()
95
+ site = parsed.netloc
96
+
97
+ # 移除查询参数
98
+ if '?' in path:
99
+ path = path.split('?')[0]
100
+
101
+ # 获取扩展名
102
+ if '.' in path:
103
+ return '.' + path.split('.')[-1], site
104
+ return '', site
105
+
106
+ def detect_by_extension(self, url: str) -> Optional[str]:
107
+ """通过文件扩展名检测类型"""
108
+ ext, site = self.get_file_extension(url)
109
+ return self.extension_map.get(ext)
110
+
111
+ def detect_by_mime_type(self, content_type: str) -> Optional[str]:
112
+ """通过MIME类型检测"""
113
+ if not content_type:
114
+ return None
115
+
116
+ # 清理content-type,移除参数
117
+ mime_type = content_type.split(';')[0].strip().lower()
118
+ return self.mime_type_map.get(mime_type)
119
+
120
+ def get_partial_content(self, url: str, max_bytes: int = 64) -> Optional[bytes]:
121
+ """获取文件的前几个字节"""
122
+ try:
123
+ headers = {'Range': f'bytes=0-{max_bytes - 1}'}
124
+ response = self.session.get(url, headers=headers, timeout=10)
125
+
126
+ if response.status_code in [200, 206]:
127
+ return response.content
128
+ except Exception as e:
129
+ print(f"获取内容失败: {e}")
130
+ return None
131
+
132
+ def detect_by_signature(self, data: bytes) -> Optional[str]:
133
+ """通过文件签名检测类型"""
134
+ if not data:
135
+ return None
136
+
137
+ # 检查各种文件签名
138
+ for signature, file_type in self.file_signatures.items():
139
+ if data.startswith(signature):
140
+ # 特殊处理需要进一步检查的格式
141
+ if signature == b'RIFF' and len(data) >= 12:
142
+ # 检查是WEBP、AVI还是WAV
143
+ if data[8:12] == b'WEBP':
144
+ return 'WEBP'
145
+ elif data[8:12] == b'AVI ':
146
+ return 'AVI'
147
+ elif data[8:12] == b'WAVE':
148
+ return 'WAV'
149
+ elif signature == b'OggS' and len(data) >= 32:
150
+ # 检查是OGG音频还是OGV视频
151
+ if b'vorbis' in data[:64].lower():
152
+ return 'OGG'
153
+ elif b'theora' in data[:64].lower():
154
+ return 'OGV'
155
+ else:
156
+ return 'OGG'
157
+ else:
158
+ return file_type
159
+
160
+ # 检查MP4相关格式
161
+ if len(data) >= 12 and data[4:8] == b'ftyp':
162
+ brand = data[8:12]
163
+ if brand in [b'mp41', b'mp42', b'isom', b'avc1']:
164
+ return 'MP4'
165
+ elif brand == b'M4A ':
166
+ return 'M4A'
167
+ elif brand == b'M4V ':
168
+ return 'M4V'
169
+ elif brand == b'qt ':
170
+ return 'MOV'
171
+
172
+ return None
173
+
174
+ def get_detailed_info(self, url, content_type, data) -> Dict:
175
+ """获取详细的文件信息"""
176
+ result = {
177
+ 'url': url,
178
+ 'site': None,
179
+ 'detected_type': None,
180
+ 'confidence': 'unknown',
181
+ 'methods_used': [],
182
+ 'content_type': content_type,
183
+ 'extension': None
184
+ }
185
+
186
+ # 1. 先尝试HEAD请求获取HTTP头信息
187
+ try:
188
+ result['content_type'] = content_type
189
+ # result['file_size'] = content_length
190
+
191
+ # 通过MIME类型检测
192
+ mime_detected = self.detect_by_mime_type(content_type)
193
+ if mime_detected:
194
+ result['detected_type'] = mime_detected
195
+ result['confidence'] = 'high'
196
+ result['methods_used'].append('mime_type')
197
+ except Exception as e:
198
+ print(f"HEAD请求失败: {e}")
199
+
200
+ # 2. 通过扩展名检测
201
+ ext_detected = self.detect_by_extension(url)
202
+ result['extension'], result['site'] = self.get_file_extension(url)
203
+
204
+ if ext_detected:
205
+ if not result['detected_type']:
206
+ result['detected_type'] = ext_detected
207
+ result['confidence'] = 'medium'
208
+ elif result['detected_type'] == ext_detected:
209
+ result['confidence'] = 'very_high' # MIME和扩展名一致
210
+ result['methods_used'].append('extension')
211
+
212
+ # 3. 如果前两种方法不确定,使用文件签名检测
213
+ if result['confidence'] in ['unknown', 'medium']:
214
+ signature_detected = self.detect_by_signature(data)
215
+ if signature_detected:
216
+ if not result['detected_type']:
217
+ result['detected_type'] = signature_detected
218
+ result['confidence'] = 'high'
219
+ elif result['detected_type'] == signature_detected:
220
+ result['confidence'] = 'very_high'
221
+ else:
222
+ # 冲突时,优先相信文件签名
223
+ result['detected_type'] = signature_detected
224
+ result['confidence'] = 'high'
225
+ result['methods_used'].append('file_signature')
226
+
227
+ return result
228
+
229
+ def detect_file_type(self, url: str) -> str:
230
+ """简单的文件类型检测,返回类型字符串"""
231
+ info = self.get_detailed_info(url)
232
+ return info.get('detected_type', 'Unknown')
233
+
234
+ def get_file_category(self, file_type: str) -> str:
235
+ """获取文件类别"""
236
+ if not file_type or file_type == 'Unknown':
237
+ return 'Unknown'
238
+
239
+ image_types = {'PNG', 'JPEG', 'GIF', 'WEBP', 'BMP', 'TIFF', 'ICO', 'SVG', 'HEIC', 'AVIF'}
240
+ video_types = {'MP4', 'AVI', 'MOV', 'WMV', 'FLV', 'WEBM', 'MKV', 'M4V', 'MPEG', '3GP', 'OGV', 'TS', 'MTS',
241
+ 'VOB'}
242
+ audio_types = {'MP3', 'WAV', 'FLAC', 'AAC', 'OGG', 'WMA', 'M4A', 'APE', 'OPUS', 'AIFF', 'AU'}
243
+
244
+ if file_type in image_types:
245
+ return 'Image'
246
+ elif file_type in video_types:
247
+ return 'Video'
248
+ elif file_type in audio_types:
249
+ return 'Audio'
250
+ else:
251
+ return 'Other'
252
+
253
+
254
+ # if __name__ == "__main__":
255
+ # detector = FileTypeDetector()
256
+ # result = detector.get_detailed_info("https://cdn.pixabay.com/user/2024/12/10/12-18-33-812_96x96.jpeg")
257
+ # print(result)
cobweb/constant.py CHANGED
@@ -1,3 +1,11 @@
1
+ from enum import Enum
2
+
3
+
4
+ class CrawlerModel:
5
+
6
+ default = "cobweb.crawlers.Crawler"
7
+ file_air = "cobweb.crawlers.FileCrawlerAir"
8
+ file_pro = "cobweb.crawlers.FileCrawlerPro"
1
9
 
2
10
 
3
11
  class LauncherModel:
@@ -22,12 +30,37 @@ class DealModel:
22
30
  poll = "deal model: poll"
23
31
 
24
32
 
33
+ class ResponseStatus(Enum):
34
+ failed = "failed"
35
+ succeed = "succeed"
36
+ filter = "filter"
37
+ max_retry = "max retry"
38
+
39
+
25
40
  class LogTemplate:
26
41
 
42
+ console_item = """
43
+ ----------------------- start - console pipeline -----------------
44
+ 种子详情 \n{seed_detail}
45
+ 解析详情 \n{parse_detail}
46
+ ----------------------- end - console pipeline ------------------
47
+ """
48
+
49
+ launcher_air_polling = """
50
+ ----------------------- start - 轮训日志: {task} -----------------
51
+ 内存队列
52
+ 种子数: {doing_len}
53
+ 待消费: {todo_len}
54
+ 已消费: {done_len}
55
+ 存储队列
56
+ 待上传: {upload_len}
57
+ ----------------------- end - 轮训日志: {task} ------------------
58
+ """
59
+
27
60
  launcher_pro_polling = """
28
61
  ----------------------- start - 轮训日志: {task} -----------------
29
62
  内存队列
30
- 种子数: {doing_len}
63
+ 消费中: {doing_len}
31
64
  待消费: {todo_len}
32
65
  已消费: {done_len}
33
66
  redis队列
@@ -63,4 +96,8 @@ class LogTemplate:
63
96
  response
64
97
  status : {status} \n{response}
65
98
  ------------------------------------------------------------------
66
- """
99
+ """
100
+
101
+ @staticmethod
102
+ def log_info(item: dict) -> str:
103
+ return "\n".join([" " * 12 + f"{str(k).ljust(14)}: {str(v)}" for k, v in item.items()])
@@ -1,2 +1 @@
1
- from .base_crawler import Crawler
2
- from .file_crawler import CrawlerAir
1
+ from .crawler import Crawler
@@ -0,0 +1,27 @@
1
+ from typing import Any, Generator
2
+ from cobweb.base import (
3
+ Seed,
4
+ BaseItem,
5
+ Request,
6
+ Response,
7
+ CSVItem,
8
+ )
9
+
10
+
11
+ class Crawler:
12
+
13
+ @staticmethod
14
+ def request(seed: Seed) -> Generator[Request, Response, None]:
15
+ yield Request(seed.url, seed, timeout=5)
16
+
17
+ @staticmethod
18
+ def download(item: Request) -> Generator[Response, Any, None]:
19
+ response = item.download()
20
+ yield Response(item.seed, response, **item.to_dict)
21
+
22
+ @staticmethod
23
+ def parse(item: Response) -> Generator[BaseItem, Any, None]:
24
+ upload_item = item.to_dict
25
+ upload_item["content"] = getattr(item.response, "text", item.response)
26
+ yield CSVItem(item.seed, data=upload_item)
27
+
cobweb/db/__init__.py CHANGED
@@ -1 +1,2 @@
1
1
  from .redis_db import RedisDB
2
+ from .api_db import ApiDB
cobweb/db/api_db.py ADDED
@@ -0,0 +1,83 @@
1
+ import os
2
+ import json
3
+ import requests
4
+
5
+
6
+ class ApiDB:
7
+
8
+ def __init__(self, host=None, **kwargs):
9
+ self.host = host or os.getenv("REDIS_API_HOST", "http://127.0.0.1:4396")
10
+
11
+ def _get_response(self, api, params: dict = None):
12
+ try:
13
+ url = self.host + api
14
+ response = requests.get(url, params=params)
15
+ json_data = response.json()
16
+ response.close()
17
+ return json_data["data"]
18
+ except:
19
+ return None
20
+
21
+ def _post_response(self, api, params: dict = None, data: dict = None):
22
+ try:
23
+ url = self.host + api
24
+ headers = {"Content-Type": "application/json"}
25
+ response = requests.post(url, headers=headers, params=params, data=json.dumps(data))
26
+ json_data = response.json()
27
+ response.close()
28
+ return json_data["data"]
29
+ except:
30
+ return None
31
+
32
+ def get(self, name):
33
+ return self._get_response(api="/get", params=dict(name=name))
34
+
35
+ def setnx(self, name, value=""):
36
+ return self._get_response(api="/setnx", params=dict(name=name, value=value))
37
+
38
+ def setex(self, name, t, value=""):
39
+ return self._get_response(api="/setex", params=dict(name=name, value=value, t=t))
40
+
41
+ def expire(self, name, t, nx: bool = False, xx: bool = False, gt: bool = False, lt: bool = False):
42
+ return self._get_response(api="/expire", params=dict(name=name, t=t, nx=nx, xx=xx, gt=gt, lt=lt))
43
+
44
+ def ttl(self, name):
45
+ return self._get_response(api="/ttl", params=dict(name=name))
46
+
47
+ def delete(self, name):
48
+ return self._get_response(api="/delete", params=dict(name=name))
49
+
50
+ def exists(self, name):
51
+ return self._get_response(api="/exists", params=dict(name=name))
52
+
53
+ def incrby(self, name, value):
54
+ return self._get_response(api="/incrby", params=dict(name=name, value=value))
55
+
56
+ def zcard(self, name) -> bool:
57
+ return self._get_response(api="/zcard", params=dict(name=name))
58
+
59
+ def zadd(self, name, item: dict, **kwargs):
60
+ if item:
61
+ return self._post_response(api="/zadd", data=dict(name=name, mapping=item, **kwargs))
62
+
63
+ def zrem(self, name, *values):
64
+ return self._post_response(api="/zrem", data=dict(name=name, values=values))
65
+
66
+ def zcount(self, name, _min, _max):
67
+ return self._get_response(api="/zcount", params=dict(name=name, min=_min, max=_max))
68
+
69
+ def lock(self, name, t=15) -> bool:
70
+ return self._get_response(api="/lock", params=dict(name=name, t=t))
71
+
72
+ def auto_incr(self, name, t=15, limit=1000) -> bool:
73
+ return self._get_response(api="/auto_incr", params=dict(name=name, t=t, limit=limit))
74
+
75
+ def members(self, name, score, start=0, count=1000, _min="-inf", _max="+inf"):
76
+ return self._get_response(api="/members", params=dict(name=name, score=score, start=start, count=count, min=_min, max=_max))
77
+
78
+ def done(self, name: list, *values):
79
+ return self._post_response(api="/done", data=dict(name=name, values=values))
80
+
81
+
82
+
83
+