cobweb-launcher 0.0.5__tar.gz → 0.0.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/PKG-INFO +1 -1
  2. cobweb-launcher-0.0.6/cobweb/__init__.py +8 -0
  3. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/redis_db.py +2 -12
  4. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/loghub.py +2 -2
  5. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/launcher.py +35 -11
  6. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/PKG-INFO +1 -1
  7. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/setup.py +1 -1
  8. cobweb-launcher-0.0.5/cobweb/__init__.py +0 -19
  9. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/LICENSE +0 -0
  10. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/README.md +0 -0
  11. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/bbb.py +0 -0
  12. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/__init__.py +0 -0
  13. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/oss_db.py +0 -0
  14. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/__init__.py +0 -0
  15. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/default.py +0 -0
  16. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/scheduler/textfile.py +0 -0
  17. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/__init__.py +0 -0
  18. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/console.py +0 -0
  19. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/redis.py +0 -0
  20. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/db/storer/textfile.py +0 -0
  21. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/__init__.py +0 -0
  22. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/distributed/models.py +0 -0
  23. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/interface.py +0 -0
  24. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/log.py +0 -0
  25. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/__init__.py +0 -0
  26. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/models.py +0 -0
  27. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/single/nest.py +0 -0
  28. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/task.py +0 -0
  29. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb/utils.py +0 -0
  30. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/SOURCES.txt +0 -0
  31. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/dependency_links.txt +0 -0
  32. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/requires.txt +0 -0
  33. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/cobweb_launcher.egg-info/top_level.txt +0 -0
  34. {cobweb-launcher-0.0.5 → cobweb-launcher-0.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -0,0 +1,8 @@
1
+ from .bbb import Seed, Queue, DBItem
2
+ from .task import Task
3
+ from .log import log
4
+ from .interface import SchedulerInterface, StorerInterface
5
+ from .db.redis_db import RedisDB
6
+ from .distributed.launcher import launcher
7
+
8
+
@@ -9,19 +9,9 @@ class RedisDB:
9
9
  self,
10
10
  project: str,
11
11
  task_name: str,
12
- host=None,
13
- port=None,
14
- username=None,
15
- password=None,
16
- db=0
12
+ config: dict
17
13
  ):
18
- pool = redis.ConnectionPool(
19
- host=host,
20
- port=port,
21
- username=username,
22
- password=password,
23
- db=db
24
- )
14
+ pool = redis.ConnectionPool(**config)
25
15
  self.heartbeat_key = f"{project}:{task_name}:heartbeat" # redis type string
26
16
  self.spider_key = f"{project}:{task_name}:seed_info:spider" # redis type zset, .format(priority)
27
17
  self.storer_key = f"{project}:{task_name}:seed_info:storer:%s" # redis type set,
@@ -5,8 +5,8 @@ from cobweb import log, StorerInterface
5
5
 
6
6
  class Loghub(StorerInterface):
7
7
 
8
- def __init__(self, table, length, queue, config):
9
- super().__init__(table, length, queue, config)
8
+ def __init__(self, **kwargs):
9
+ super().__init__(**kwargs)
10
10
  self.client = None
11
11
 
12
12
  def init_loghub_clint(self):
@@ -2,10 +2,11 @@ import time
2
2
  import threading
3
3
  from threading import Thread
4
4
  from importlib import import_module
5
- from models import Scheduler, Spider, Storer
6
- from cobweb.utils import struct_queue_name, restore_table_name
7
- from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
8
5
 
6
+ from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
7
+ from cobweb.utils import struct_queue_name, restore_table_name
8
+ from .models import Scheduler, Spider, Storer
9
+ from collections import namedtuple
9
10
 
10
11
  # def start_seeds(seeds):
11
12
  # if not seeds:
@@ -29,14 +30,14 @@ from cobweb import log, Queue, DBItem, RedisDB, StorerInterface
29
30
  # storer_data[db_name]["db_args_list"].append(info[1:])
30
31
  # return storer_data
31
32
 
32
- def get_storer_db(db):
33
+ def get_scheduler_db(db):
33
34
  if isinstance(db, str):
34
35
  if "." in db:
35
36
  model_path = db.split(".")
36
37
  model = import_module(db)
37
38
  obj = getattr(model, db)
38
39
  else:
39
- model = import_module(f"cobweb.db.storer.{db.lower()}")
40
+ model = import_module(f"cobweb.db.scheduler.{db.lower()}")
40
41
  obj = getattr(model, db.capitalize())
41
42
  return obj
42
43
  # if db.lower() in dir(StorerDB):
@@ -48,6 +49,25 @@ def get_storer_db(db):
48
49
  raise TypeError()
49
50
 
50
51
 
52
+ def get_storer_db(db):
53
+ if isinstance(db, str):
54
+ if "." in db:
55
+ model_path = db.split(".")
56
+ model = import_module(db)
57
+ obj = getattr(model, db)
58
+ else:
59
+ model = import_module(f"cobweb.db.storer.{db.lower()}")
60
+ obj = getattr(model, db.capitalize())
61
+ return obj, db.lower()
62
+ # if db.lower() in dir(StorerDB):
63
+ # return getattr(StorerDB, db)
64
+ # else:
65
+ # pass
66
+ elif issubclass(db, StorerInterface):
67
+ return db, db.__name__.lower()
68
+ raise TypeError()
69
+
70
+
51
71
  def check(stop, last, spider, scheduler, storer_list, ready_seed_length, spider_queue_length):
52
72
  time.sleep(5)
53
73
  while True:
@@ -101,18 +121,22 @@ def launcher(task):
101
121
  stop = threading.Event()
102
122
 
103
123
  # 初始化redis信息
104
- redis_db = RedisDB(task.project, task.task_name, *task.redis_info)
124
+ redis_db = RedisDB(task.project, task.task_name, task.redis_info)
105
125
 
106
126
  log.info("初始化cobweb!")
107
127
 
108
128
  seed_queue = Queue()
109
129
 
130
+ if task.scheduler_info is None:
131
+ task.scheduler_info = dict()
132
+
110
133
  # 调度器动态继承
111
134
  sql = task.scheduler_info.get("sql")
112
135
  table = task.scheduler_info.get("table")
113
136
  size = task.scheduler_info.get("size")
114
137
  scheduler_config = task.scheduler_info.get("config")
115
- DB = task.scheduler_info["db"]
138
+ scheduler_db = task.scheduler_info.get("db", "default")
139
+ DB = get_scheduler_db(scheduler_db)
116
140
  # SchedulerDB, table, sql, length, size, config = task.scheduler_info
117
141
  SchedulerTmp = type(DB.__name__, (Scheduler, DB), {})
118
142
 
@@ -134,12 +158,12 @@ def launcher(task):
134
158
  item = type("Item", (object,), {"redis_client": redis_db})()
135
159
 
136
160
  for storer_info in storer_info_list:
137
- db = storer_info["db"]
161
+ storer_db = storer_info["db"]
138
162
  fields = storer_info["fields"]
139
163
  storer_table = storer_info.get("table", "console")
140
164
  storer_config = storer_info.get("config")
141
165
 
142
- StorerDB, db_name = get_storer_db(db)
166
+ StorerDB, db_name = get_storer_db(storer_db)
143
167
  StorerTmp = type(StorerDB.__name__, (Storer, StorerDB), {})
144
168
 
145
169
  if not getattr(item, db_name, None):
@@ -154,9 +178,9 @@ def launcher(task):
154
178
  # 初始话存储器
155
179
  table_name = restore_table_name(table_name=storer_table)
156
180
  storer = StorerTmp(
157
- table=table_name, queue=queue,
181
+ table=table_name, fields=fields,
158
182
  length=task.storer_queue_length,
159
- config=storer_config
183
+ queue=queue, config=storer_config
160
184
  )
161
185
  storer_list.append(storer)
162
186
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cobweb-launcher
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: spider_hole
5
5
  Home-page: https://github.com/Juannie-PP/cobweb
6
6
  Author: Juannie-PP
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
5
5
 
6
6
  setup(
7
7
  name="cobweb-launcher",
8
- version="0.0.5",
8
+ version="0.0.6",
9
9
  packages=find_packages(),
10
10
  url="https://github.com/Juannie-PP/cobweb",
11
11
  license="MIT",
@@ -1,19 +0,0 @@
1
- # from .distributed.launcher import launcher
2
- # from .distributed import models
3
- from .bbb import Seed, Queue, DBItem
4
- from .task import Task
5
- from .log import log
6
- from .interface import SchedulerInterface, StorerInterface
7
- from .db.redis_db import RedisDB
8
- # from . import db
9
- # from .base.bbb import Seed
10
- # from .base import interface
11
- # from .base import config
12
- # from .base import utils
13
- # from .base import log as logger
14
- #
15
- # from .db.scheduler import *
16
- # from .db.storer import *
17
- # from .db.base import *
18
-
19
-
File without changes