cobweb-launcher 3.1.4__py3-none-any.whl → 3.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cobweb/__init__.py CHANGED
@@ -1,2 +1,4 @@
1
1
  from .launchers import Launcher
2
2
  from .constant import CrawlerModel
3
+ from .pipelines import Pipeline
4
+ from .crawlers import Crawler
cobweb/db/api_db.py CHANGED
@@ -71,7 +71,7 @@ class ApiDB:
71
71
  def auto_incr(self, name, t=15, limit=1000) -> bool:
72
72
  return self._get_response(api="/auto_incr", params=dict(name=name, t=t, limit=limit))
73
73
 
74
- def members(self, name, score, start=0, count=5000, _min="-inf", _max="+inf"):
74
+ def members(self, name, score, start=0, count=1000, _min="-inf", _max="+inf"):
75
75
  return self._get_response(api="/members", params=dict(name=name, score=score, start=start, count=count, min=_min, max=_max))
76
76
 
77
77
  def done(self, name: list, *values):
@@ -1,8 +1,3 @@
1
- # from .launcher_air import LauncherAir
2
- # from .launcher_pro import LauncherPro
3
- # from .launcher_api import LauncherApi
4
-
5
-
6
1
  from .launcher import Launcher
7
2
  from .uploader import Uploader
8
3
  from .distributor import Distributor
@@ -1,22 +1,16 @@
1
1
  import time
2
2
  import threading
3
3
  import traceback
4
- from inspect import isgenerator
4
+
5
5
  from typing import Callable
6
+ from inspect import isgenerator
6
7
  from urllib.parse import urlparse
7
8
  from requests import Response as Res
8
9
 
9
10
  from cobweb import setting
10
11
  from cobweb.constant import DealModel, LogTemplate
11
- from cobweb.base import (
12
- Seed,
13
- Queue,
14
- BaseItem,
15
- Request,
16
- Response,
17
- logger
18
- )
19
12
  from cobweb.utils import LoghubDot, check_pause
13
+ from cobweb.base import Seed, Queue, BaseItem, Request, Response, logger
20
14
 
21
15
 
22
16
  class Distributor(threading.Thread):
@@ -27,7 +27,7 @@ class Uploader(threading.Thread):
27
27
  self.upload_size = setting.UPLOAD_QUEUE_MAX_SIZE
28
28
  self.wait_seconds = setting.UPLOAD_QUEUE_WAIT_SECONDS
29
29
 
30
- self.Pipeline = SpiderPipeline
30
+ self.pipeline = SpiderPipeline()
31
31
 
32
32
  logger.debug(f"Uploader instance attrs: {self.__dict__}")
33
33
 
@@ -45,11 +45,11 @@ class Uploader(threading.Thread):
45
45
  if not item:
46
46
  break
47
47
  seeds.append(item.seed)
48
- data = self.Pipeline.build(item)
48
+ data = self.pipeline.build(item)
49
49
  data_info.setdefault(item.table, []).append(data)
50
50
  for table, datas in data_info.items():
51
51
  try:
52
- self.Pipeline.upload(table, datas)
52
+ self.pipeline.upload(table, datas)
53
53
  except Exception as e:
54
54
  logger.info(e)
55
55
  except Exception as e:
@@ -7,7 +7,7 @@ from cobweb.utils import check_pause
7
7
  from cobweb.base import Queue, Seed, logger
8
8
  from cobweb.constant import LogTemplate
9
9
  from .scheduler import Scheduler
10
- use_api = bool(int(os.getenv("REDIS_API", 0)))
10
+ use_api = bool(int(os.getenv("REDIS_API_HOST", 0)))
11
11
 
12
12
 
13
13
  class RedisScheduler(Scheduler):
@@ -153,10 +153,10 @@ class RedisScheduler(Scheduler):
153
153
  else:
154
154
  if all_count:
155
155
  logger.info(f"todo seeds count: {todo_count}, queue length: {all_count}")
156
- self.pause.is_set()
156
+ self.pause.clear()
157
157
  else:
158
158
  logger.info("Done! pause set...")
159
- self.pause.clear()
159
+ self.pause.set()
160
160
  else:
161
161
  if self.pause.is_set():
162
162
  self.pause.clear()
cobweb/setting.py CHANGED
@@ -18,7 +18,7 @@ LOGHUB_CONFIG = {
18
18
  "accessKey": os.getenv("LOGHUB_SECRET_KEY")
19
19
  }
20
20
 
21
- # oss util config
21
+ # # oss util config
22
22
  OSS_BUCKET = os.getenv("OSS_BUCKET")
23
23
  OSS_ENDPOINT = os.getenv("OSS_ENDPOINT")
24
24
  OSS_ACCESS_KEY = os.getenv("OSS_ACCESS_KEY")
@@ -72,9 +72,9 @@ TASK_MODEL = 0 # 0:单次,1:常驻
72
72
  # 流控措施
73
73
  SPEED_CONTROL = 1 # 0:关闭,1:开启
74
74
 
75
- # bloom过滤器
76
- CAPACITY = 100000000
77
- ERROR_RATE = 0.001
78
- FILTER_FIELD = "url"
75
+ # # bloom过滤器
76
+ # CAPACITY = 100000000
77
+ # ERROR_RATE = 0.001
78
+ # FILTER_FIELD = "url"
79
79
  # 文件下载响应类型过滤
80
80
  # FILE_FILTER_CONTENT_TYPE = ["text/html", "application/xhtml+xml"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 3.1.4
3
+ Version: 3.1.6
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -1,6 +1,6 @@
1
- cobweb/__init__.py,sha256=UfNq1asNq7_a8IKf5WCbk0ju5fxT4wElAyaItf_a-d8,67
1
+ cobweb/__init__.py,sha256=YdBi3uytEFRXan155xU1kKMpiUKUupO2RGeJyXmH0zk,129
2
2
  cobweb/constant.py,sha256=zy3XYsc1qp2B76_Fn_hVQ8eGHlPBd3OFlZK2cryE6FY,2839
3
- cobweb/setting.py,sha256=yP9ZkVShTY4sCZ4DpzI_hO-FMSf0vGrQvkp7UCmZaa0,2338
3
+ cobweb/setting.py,sha256=njtcRjtfUrAc4LwEsSQgBa2sRVpcVd_He3NARnDE_n0,2348
4
4
  cobweb/base/__init__.py,sha256=epVQttTHQcux9kAtrGuO9HB_wP74L-pi74vhzc4QOCw,224
5
5
  cobweb/base/common_queue.py,sha256=W7PPZZFl52j3Mc916T0imHj7oAUelA6aKJwW-FecDPE,872
6
6
  cobweb/base/item.py,sha256=hYheVTV2Bozp4iciJpE2ZwBIXkaqBg4QQkRccP8yoVk,1049
@@ -11,14 +11,14 @@ cobweb/base/seed.py,sha256=A-F1urjbE5hYNWTCwq3sUV4nrxlK_RGMoCmjBmIwYsI,3158
11
11
  cobweb/crawlers/__init__.py,sha256=msvkB9mTpsgyj8JfNMsmwAcpy5kWk_2NrO1Adw2Hkw0,29
12
12
  cobweb/crawlers/crawler.py,sha256=73WFGVNvIFvH8cP5RfDhAhM-WaFL_mdwnAhNohDnBO0,696
13
13
  cobweb/db/__init__.py,sha256=uZwSkd105EAwYo95oZQXAfofUKHVIAZZIPpNMy-hm2Q,56
14
- cobweb/db/api_db.py,sha256=bDc5dJQxq4z04h70KUTHd0OqUOEY7Cm3wcNJZtTvJIM,3015
14
+ cobweb/db/api_db.py,sha256=kMJhQnhdmCewldG5It0Go8Ge9LBqw0eGwyoynVGeCrQ,3015
15
15
  cobweb/db/redis_db.py,sha256=rK6PnKLLVTeg6HMGVtlYl4_b5og3aO2J5jZrqU2Aoso,7721
16
16
  cobweb/exceptions/__init__.py,sha256=E9SHnJBbhD7fOgPFMswqyOf8SKRDrI_i25L0bSpohvk,32
17
17
  cobweb/exceptions/oss_db_exception.py,sha256=iP_AImjNHT3-Iv49zCFQ3rdLnlvuHa3h2BXApgrOYpA,636
18
- cobweb/launchers/__init__.py,sha256=XEW4hmBMPKp2bRkHBxxlplXwvKdlLVYBFg3etjtbRXo,222
19
- cobweb/launchers/distributor.py,sha256=WWLyDaSGMWQWKfZX9guUzIZEwAX9FM5pqOUP3WDfEak,6447
18
+ cobweb/launchers/__init__.py,sha256=6_v2jd2sgj6YnOB1nPKiYBskuXVb5xpQnq2YaDGJgQ8,100
19
+ cobweb/launchers/distributor.py,sha256=-tr3c3O3QBV9UYH1optdBfq76_o83RjII5YKiIYl33w,6420
20
20
  cobweb/launchers/launcher.py,sha256=rDT7OQX3wzaS_MsbrkeWDH5IHpN5GE_Oy4tUkvnovKM,5302
21
- cobweb/launchers/uploader.py,sha256=8zKbUio7aPn63-aC1v7PASzRzc2JX7d59RqeiFw4zMY,1816
21
+ cobweb/launchers/uploader.py,sha256=gJiKMVeoxxwrLsYPaMuXJN7wc5LFxXfjC4VVIlqqlqk,1818
22
22
  cobweb/pipelines/__init__.py,sha256=zSUsGtx6smbs2iXBXvYynReKSgky-3gjqaAtKVnA_OU,105
23
23
  cobweb/pipelines/pipeline.py,sha256=qwoOYMhlAB-MnEmMNpNeauTHoRTOr2wyBDYS4MF6B1c,261
24
24
  cobweb/pipelines/pipeline_console.py,sha256=NEh-4zhuVAQOqwXLsqeb-rcNZ9_KXFUpL3otUTL5qBs,754
@@ -27,15 +27,15 @@ cobweb/schedulers/__init__.py,sha256=LEya11fdAv0X28YzbQTeC1LQZ156Fj4cyEMGqQHUWW0
27
27
  cobweb/schedulers/launcher_air.py,sha256=qdcxq41I9zN5snEpMUUNEQNTtUiZM1Hw_3N9zu4PuAs,3058
28
28
  cobweb/schedulers/launcher_api.py,sha256=3-A6k3Igvi-xnvP9M_3NkJKUweDJ_pY10ZrHClteD-g,8628
29
29
  cobweb/schedulers/scheduler.py,sha256=mN9XvaOCzNnBWQfzslTIM860ZGq2gyLtxpbVUd0Slqs,2240
30
- cobweb/schedulers/scheduler_with_redis.py,sha256=LmfQ_uwpXxwvG_i52pDl93T_fxqx3eWg4M2XgDgvmGQ,6397
30
+ cobweb/schedulers/scheduler_with_redis.py,sha256=Qq8iMxbDWJpRml76NfcZyyB8q7tM2Et0YrOcF7nwcP0,6399
31
31
  cobweb/utils/__init__.py,sha256=8Bu5iZrIOUMS4jv4hi0inRPtscf6MK0ZFa7gQ7ZFoqw,145
32
32
  cobweb/utils/bloom.py,sha256=vng-YbKgh9HbtpAWYf_nkUSbfVTOj40aqUUejRYlsCU,1752
33
33
  cobweb/utils/decorators.py,sha256=066JCY_RNMr2mXkhEv8XTtOOKkv9CFiBm0ZNCcC-2ag,1131
34
34
  cobweb/utils/dotting.py,sha256=mVICaa26R-dQ4JGmPK-kkR6QjX38QiRewXZnGb2DCIc,1784
35
35
  cobweb/utils/oss.py,sha256=ktfwMGnq5FMzOkUUS3nNXI7pTdPNinudH3YRJ0nMhoU,3985
36
36
  cobweb/utils/tools.py,sha256=5JEaaAwYoV9Sdla2UBIJn6faUBuXmxUMagm9ck6FVqs,1253
37
- cobweb_launcher-3.1.4.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
38
- cobweb_launcher-3.1.4.dist-info/METADATA,sha256=ZkU5297omwKNvX0bdLBCamER88JZ647bflowb9LWD70,6509
39
- cobweb_launcher-3.1.4.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
40
- cobweb_launcher-3.1.4.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
41
- cobweb_launcher-3.1.4.dist-info/RECORD,,
37
+ cobweb_launcher-3.1.6.dist-info/LICENSE,sha256=z1rxSIGOyzcSb3orZxFPxzx-0C1vTocmswqBNxpKfEk,1063
38
+ cobweb_launcher-3.1.6.dist-info/METADATA,sha256=1hnno3Ujgp1nUQP2VTHAMy-VyUFMx09qRYjo4O4Iz3E,6509
39
+ cobweb_launcher-3.1.6.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
40
+ cobweb_launcher-3.1.6.dist-info/top_level.txt,sha256=4GETBGNsKqiCUezmT-mJn7tjhcDlu7nLIV5gGgHBW4I,7
41
+ cobweb_launcher-3.1.6.dist-info/RECORD,,