cobweb-launcher 1.1.6__tar.gz → 1.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {cobweb-launcher-1.1.6/cobweb_launcher.egg-info → cobweb-launcher-1.1.9}/PKG-INFO +1 -1
  2. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/crawlers/file_crawler.py +3 -1
  3. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/launchers/launcher.py +10 -6
  4. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/launchers/launcher_pro.py +4 -4
  5. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9/cobweb_launcher.egg-info}/PKG-INFO +1 -1
  6. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/setup.py +2 -2
  7. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/LICENSE +0 -0
  8. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/README.md +0 -0
  9. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/__init__.py +0 -0
  10. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/__init__.py +0 -0
  11. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/common_queue.py +0 -0
  12. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/decorators.py +0 -0
  13. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/item.py +0 -0
  14. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/log.py +0 -0
  15. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/request.py +0 -0
  16. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/response.py +0 -0
  17. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/base/seed.py +0 -0
  18. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/constant.py +0 -0
  19. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/crawlers/__init__.py +0 -0
  20. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/crawlers/base_crawler.py +0 -0
  21. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/db/__init__.py +0 -0
  22. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/db/redis_db.py +0 -0
  23. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/exceptions/__init__.py +0 -0
  24. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/exceptions/oss_db_exception.py +0 -0
  25. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/launchers/__init__.py +0 -0
  26. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/pipelines/__init__.py +0 -0
  27. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/pipelines/base_pipeline.py +0 -0
  28. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/pipelines/loghub_pipeline.py +0 -0
  29. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/setting.py +0 -0
  30. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/utils/__init__.py +0 -0
  31. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/utils/oss.py +0 -0
  32. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb/utils/tools.py +0 -0
  33. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  34. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  35. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb_launcher.egg-info/requires.txt +0 -0
  36. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/cobweb_launcher.egg-info/top_level.txt +0 -0
  37. {cobweb-launcher-1.1.6 → cobweb-launcher-1.1.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.1.6
3
+ Version: 1.1.9
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -123,12 +123,14 @@ class FileCrawlerPro(Crawler):
123
123
  item.seed, response, filter=True, msg=f"response content type is {content_type}",
124
124
  bucket_name=bucket_name, data_size=content_length, **seed_dict
125
125
  )
126
+ response.close()
126
127
  elif position == 1 and min_upload_size >= content_length > 0:
127
128
  """过小文件标识返回"""
128
129
  yield Response(
129
130
  item.seed, response, filter=True, msg="file size is too small",
130
131
  bucket_name=bucket_name, data_size=content_length, **seed_dict
131
132
  )
133
+ response.close()
132
134
  elif position == 1 and chunk_size > content_length > min_upload_size:
133
135
  """小文件直接下载"""
134
136
  for part_data in response.iter_content(chunk_size):
@@ -153,12 +155,12 @@ class FileCrawlerPro(Crawler):
153
155
  seed_dict['position'] = position
154
156
  seed_dict['start'] = upload_content_length
155
157
 
156
- response.close()
157
158
  if content:
158
159
  oss_util.put_part(key, upload_id, position, content)
159
160
  content_length += len(content)
160
161
  oss_util.merge(key, upload_id)
161
162
  yield Response(item.seed, response, bucket_name=bucket_name, data_size=content_length, **seed_dict)
163
+ response.close()
162
164
 
163
165
  elif item.seed.params.identifier == "merge":
164
166
  oss_util.merge(key, seed_dict["upload_id"])
@@ -34,7 +34,7 @@ class Launcher(threading.Thread):
34
34
  "_delete",
35
35
  ]
36
36
 
37
- def __init__(self, task, project, custom_setting=None):
37
+ def __init__(self, task, project, custom_setting=None, **kwargs):
38
38
  super().__init__()
39
39
  self.task = task
40
40
  self.project = project
@@ -42,10 +42,11 @@ class Launcher(threading.Thread):
42
42
  self._stop = threading.Event() # 结束事件
43
43
  self._pause = threading.Event() # 暂停事件
44
44
 
45
+ _setting = dict()
46
+
45
47
  if custom_setting:
46
- setting_ = dict()
47
48
  if isinstance(custom_setting, dict):
48
- setting_ = custom_setting
49
+ _setting = custom_setting
49
50
  else:
50
51
  if isinstance(custom_setting, str):
51
52
  custom_setting = importlib.import_module(custom_setting)
@@ -53,9 +54,12 @@ class Launcher(threading.Thread):
53
54
  raise Exception
54
55
  for k, v in custom_setting.__dict__.items():
55
56
  if not k.startswith("__") and not inspect.ismodule(v):
56
- setting_[k] = v
57
- for k, v in setting_.items():
58
- setattr(setting, k, v)
57
+ _setting[k] = v
58
+
59
+ _setting.update(**kwargs)
60
+
61
+ for k, v in _setting.items():
62
+ setattr(setting, k, v)
59
63
 
60
64
  self._Crawler = dynamic_load_class(setting.CRAWLER)
61
65
  self._Pipeline = dynamic_load_class(setting.PIPELINE)
@@ -9,8 +9,8 @@ from cobweb.constant import DealModel, LogTemplate
9
9
 
10
10
  class LauncherPro(Launcher):
11
11
 
12
- def __init__(self, task, project, custom_setting=None):
13
- super().__init__(task, project, custom_setting)
12
+ def __init__(self, task, project, custom_setting=None, **kwargs):
13
+ super().__init__(task, project, custom_setting, **kwargs)
14
14
  self._todo = "{%s:%s}:todo" % (project, task)
15
15
  self._done = "{%s:%s}:done" % (project, task)
16
16
  self._fail = "{%s:%s}:fail" % (project, task)
@@ -147,12 +147,12 @@ class LauncherPro(Launcher):
147
147
  self._pause.clear()
148
148
  self._execute()
149
149
  elif queue_not_empty_count == 0:
150
- pooling_wait_seconds = 3
150
+ pooling_wait_seconds = 5
151
151
  check_emtpy_times += 1
152
152
  else:
153
153
  check_emtpy_times = 0
154
154
 
155
- if not self._db.zcard(self._todo) and check_emtpy_times > 2:
155
+ if not self._db.zcount(self._todo, _min=0, _max="(1000") and check_emtpy_times > 2:
156
156
  check_emtpy_times = 0
157
157
  self.__DOING__ = {}
158
158
  self._pause.set()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 1.1.6
3
+ Version: 1.1.9
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="1.1.6",
8
+ version="1.1.9",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",
@@ -22,4 +22,4 @@ setup(
22
22
  "cobweb-launcher, cobweb",
23
23
  ],
24
24
  python_requires=">=3.7",
25
- )
25
+ )
File without changes