ex-cd 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ex_cd/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ from .config import build_parser, read_config
2
+ from .history import get_latest_url, put_history_placeholder
3
+ from .output import initialize_logging
4
+ from .download import download_gallery_history
5
+
6
+
7
+ def main():
8
+ parser = build_parser()
9
+ args = parser.parse_args()
10
+ logger = initialize_logging(args.loglevel)
11
+ config = read_config(args, logger)
12
+ logger.info(f"Parsed config: {args}")
13
+ url = args.url
14
+ url, gallery_dir = get_latest_url(url, config, logger)
15
+ logger.info(f"Downloading: {url} -> {gallery_dir}")
16
+ put_history_placeholder(url, gallery_dir, config, logger)
17
+ for _ in range(config['retry']):
18
+ try:
19
+ download_gallery_history(url, gallery_dir, config, logger)
20
+ except Exception as e:
21
+ logger.error(f"download_gallery_history failed, retry: {e}")
22
+ continue
23
+ return # Successfully downloaded
24
+ return download_gallery_history(url, gallery_dir, config, logger) # Final attempt, may raise an exception
ex_cd/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ import sys
2
+ import ex_cd
3
+
4
+ if __name__ == "__main__":
5
+ sys.exit(ex_cd.main())
ex_cd/collect.py ADDED
@@ -0,0 +1,69 @@
1
+ import os
2
+ import json
3
+ import hashlib
4
+ from .common import META_FOLDER, _get_gallery_metadata_filenames
5
+ from .meta import _download_gallery_meta
6
+
7
+
8
+ def _iter_metadata(gallery_dir, logger):
9
+ meta_folder = os.path.join(gallery_dir, META_FOLDER)
10
+ for metafile in _get_gallery_metadata_filenames(gallery_dir):
11
+ img, meta_ext = os.path.splitext(metafile)
12
+ if not meta_ext == '.json':
13
+ continue
14
+ metapath = os.path.join(meta_folder, metafile)
15
+ try:
16
+ with open(metapath, encoding='utf8') as fp:
17
+ yield img, json.load(fp)
18
+ except Exception as e:
19
+ logger.warning(f"Cannot load exist json file {metafile}: {e}")
20
+
21
+
22
+ def _iter_imgfile_metadata(gallery_dir, logger):
23
+ for img, meta in _iter_metadata(gallery_dir, logger):
24
+ imgfile = os.path.join(gallery_dir, img)
25
+ if not os.path.isfile(imgfile):
26
+ continue
27
+ yield imgfile, meta
28
+
29
+
30
+ def _check_img(imgfile, meta, logger):
31
+ if 'image_token' not in meta:
32
+ return False
33
+ image_token = meta['image_token']
34
+ sha1_token = None
35
+ if not os.path.isfile(imgfile):
36
+ return False
37
+ try:
38
+ with open(imgfile, mode="rb") as fp:
39
+ sha1_token = hashlib.sha1(fp.read()).hexdigest()
40
+ except Exception as e:
41
+ logger.error(f"Invalid {imgfile}: cannot compute token, {e}")
42
+ return False
43
+ return image_token == sha1_token[0:10]
44
+
45
+
46
+ def _collect_gallery_history(gallery_dir, config, logger):
47
+ history = {}
48
+ for imgfile, meta in _iter_imgfile_metadata(gallery_dir, logger):
49
+ if _check_img(imgfile, meta, logger):
50
+ history[meta['image_token']] = imgfile
51
+ return history
52
+
53
+
54
+ def _load_gallery_history(url, gallery_dir, config, logger, history):
55
+ """Move deprecated images from child gallery to parent gallery"""
56
+ _download_gallery_meta(url, gallery_dir, config, logger)
57
+ for img, meta in _iter_metadata(gallery_dir, logger):
58
+ if 'image_token' not in meta:
59
+ continue
60
+ image_token = meta['image_token']
61
+ if image_token not in history:
62
+ continue
63
+ imgfile = os.path.join(gallery_dir, img)
64
+ if _check_img(imgfile, meta, logger):
65
+ continue
66
+ if os.path.exists(imgfile):
67
+ os.remove(imgfile)
68
+ os.rename(history[image_token], imgfile)
69
+ del history[image_token]
ex_cd/common.py ADDED
@@ -0,0 +1,87 @@
1
+ import os
2
+ from tempfile import TemporaryDirectory
3
+ import json
4
+ import random
5
+ import re
6
+ import ex_cd.gallery_dl_exec as gallery_dl
7
+
8
+ replace_site_re = re.compile(r"https://e[-x]hentai.org")
9
+
10
+
11
+ def replace_site(url, config):
12
+ site = config["replace-site"]
13
+ return re.sub(replace_site_re, site, url) if site else url
14
+
15
+
16
+ META_FOLDER = '.metadata'
17
+
18
+ metadata_args = ["--write-metadata", "--postprocessor-option", f"directory={META_FOLDER}"]
19
+
20
+
21
+ def _download_gallery_metadata_and_extract_gallery_dir(url, config, logger):
22
+ '''Download a metadata json file of the gallery'''
23
+ with TemporaryDirectory() as dirname:
24
+ filename = os.path.join(dirname, 'temp.txt')
25
+ gallery_dl_exec = config["gallery-dl-exec"]
26
+ gallery_dl_meta_args = config["gallery-dl-meta-args"]
27
+ args = [
28
+ *gallery_dl_exec, "--no-download", "--no-skip", "--range", "1",
29
+ "--exec-after", "echo {_directory} > %s" % filename,
30
+ *metadata_args, *gallery_dl_meta_args, replace_site(url, config)
31
+ ]
32
+ logger.debug(f"Exec: {args}")
33
+ returncode = gallery_dl.main(*args)
34
+ if returncode != 0 or not os.path.isfile(filename):
35
+ raise ValueError("Cannot get gallery by gallery-dl")
36
+ with open(filename, encoding='utf8') as fp:
37
+ gallery_dir = os.path.join(fp.read().strip())
38
+ # ↓↓↓↓↓↓↓↓ for stupid windows ↓↓↓↓↓↓↓↓
39
+ gallery_dir = re.sub(r'^"', "", gallery_dir)
40
+ gallery_dir = re.sub(r'"$', "", gallery_dir)
41
+ gallery_dir = re.sub(r'^\\+\?\\+', "", gallery_dir)
42
+ # ↑↑↑↑↑↑↑↑ for stupid windows ↑↑↑↑↑↑↑↑
43
+ gallery_dir = os.path.join(gallery_dir)
44
+ return gallery_dir
45
+
46
+
47
+ def _get_gallery_metadata_filenames(gallery_dir):
48
+ metafiles = []
49
+ meta_folder = os.path.join(gallery_dir, META_FOLDER)
50
+ os.makedirs(meta_folder, exist_ok=True)
51
+ for file in os.listdir(meta_folder):
52
+ if os.path.splitext(file)[1] == '.json':
53
+ metafiles.append(file)
54
+ return metafiles
55
+
56
+
57
+ def _get_gallery_metadata_files_path(gallery_dir):
58
+ meta_folder = os.path.join(gallery_dir, META_FOLDER)
59
+ return [os.path.join(meta_folder, file) for file in _get_gallery_metadata_filenames(gallery_dir)]
60
+
61
+
62
+ def _try_get_gallery_one_metadata_from_dir(gallery_dir, logger):
63
+ '''Read a json file in the gallery metadata json files'''
64
+ if gallery_dir is not None:
65
+ metafiles = _get_gallery_metadata_files_path(gallery_dir)
66
+ if len(metafiles) > 0:
67
+ metafile = metafiles[random.randint(0, len(metafiles) - 1)]
68
+ try:
69
+ with open(metafile, encoding='utf8') as fp:
70
+ return json.load(fp)
71
+ except Exception as e:
72
+ logger.error(f"Cannot load exist json file {metafile}: {e}")
73
+
74
+
75
+ def get_gallery_one_metadata(url, gallery_dir, config, logger):
76
+ '''Get a metadata of a gallery'''
77
+ meta = _try_get_gallery_one_metadata_from_dir(gallery_dir, logger)
78
+ if not meta:
79
+ gallery_dir2 = _download_gallery_metadata_and_extract_gallery_dir(url, config, logger)
80
+ if gallery_dir is None:
81
+ gallery_dir = gallery_dir2
82
+ if os.path.abspath(gallery_dir) != os.path.abspath(gallery_dir2):
83
+ raise ValueError(f"gallery_dir not match: {gallery_dir} != {gallery_dir2}")
84
+ meta = _try_get_gallery_one_metadata_from_dir(gallery_dir, logger)
85
+ if not meta:
86
+ raise ValueError(f"Cannot get metadata: {url, gallery_dir}")
87
+ return meta
ex_cd/config.py ADDED
@@ -0,0 +1,92 @@
1
+ import argparse
2
+ import logging
3
+ import json
4
+ import os
5
+ import sys
6
+
7
+
8
+ def build_parser():
9
+ """Build and configure an ArgumentParser object"""
10
+ parser = argparse.ArgumentParser(usage="%(prog)s [OPTION]... URL...")
11
+
12
+ general = parser.add_argument_group("General Options")
13
+ general.add_argument(
14
+ "-c", "--config",
15
+ dest="config", type=str,
16
+ help="Path to config json file or a json string",
17
+ default=None,
18
+ )
19
+ output = parser.add_argument_group("Output Options")
20
+ output.add_argument(
21
+ "-q", "--quiet",
22
+ dest="loglevel", default=logging.INFO,
23
+ action="store_const", const=logging.ERROR,
24
+ help="Activate quiet mode",
25
+ )
26
+ output.add_argument(
27
+ "-v", "--verbose",
28
+ dest="loglevel",
29
+ action="store_const", const=logging.DEBUG,
30
+ help="Print various debugging information",
31
+ )
32
+ parser.add_argument(
33
+ "url", type=str,
34
+ help="URL of the gallery to download",
35
+ )
36
+
37
+ return parser
38
+
39
+
40
+ config = {
41
+ # Replace https://e-hentai.org or https://exhentai.org by this before start download
42
+ "replace-site": None,
43
+ # Root for the gallery
44
+ "gallery-root": None,
45
+ # Regular expression to extract path of the file from URL
46
+ "path-re": "^https://e[-x]hentai.org/g/([0-9]+)/[0-9a-z]+/*$",
47
+ # if specified "gallery-root" and valid "path-re", get_gallery_dir_by_re will be used to get path
48
+
49
+ # Retry times on failure
50
+ "retry": 3,
51
+
52
+ # Maximum depth of the gallery history
53
+ "depth": 8,
54
+
55
+ # Executable gallery-dl commandline program
56
+ "gallery-dl-exec": [sys.executable, "-m", "gallery_dl"],
57
+ # Args for running gallery-dl commandline program
58
+ "gallery-dl-args": [],
59
+ # Args for running gallery-dl commandline program for meta extraction
60
+ "gallery-dl-meta-args": [],
61
+ }
62
+
63
+
64
+ def read_config(args, logger):
65
+ if "EXCD_CONFIG_FILE" in os.environ:
66
+ try:
67
+ with open(os.environ["EXCD_CONFIG_FILE"], encoding='utf8') as f:
68
+ override = json.load(f)
69
+ for k in config:
70
+ if k in override:
71
+ config[k] = override[k]
72
+ except Exception as e:
73
+ logger.warn("Cannot read EXCD_CONFIG_FILE %s: %s" % (os.environ["EXCD_CONFIG_FILE"], e))
74
+
75
+ if args.config is not None:
76
+ if os.path.isfile(args.config):
77
+ try:
78
+ with open(args.config, encoding='utf8') as f:
79
+ override = json.load(f)
80
+ except Exception as e:
81
+ logger.warn("Cannot read config %s: %s" % (args.config, e))
82
+ else:
83
+ try:
84
+ override = json.loads(args.config)
85
+ except Exception as e:
86
+ logger.warn("Cannot parse config %s: %s" % (args.config, e))
87
+
88
+ for k in config:
89
+ if k in override:
90
+ config[k] = override[k]
91
+ logger.info("Config: %s" % config)
92
+ return config
ex_cd/deprecate.py ADDED
@@ -0,0 +1,44 @@
1
+ import os
2
+ import json
3
+ from .common import META_FOLDER, _get_gallery_metadata_filenames
4
+ from .validate import _validate_gallery
5
+ from .meta import _download_gallery_meta
6
+
7
+
8
+ DEPRECAT_COMPLETED_FILE = 'DeprecateCompleted'
9
+
10
+
11
+ def _deprecate_gallery_history(parent_url, parent_gallery_dir, child_url, child_gallery_dir, config, logger):
12
+ """Move deprecated images from parent gallery to child gallery (reverse of _load_gallery_history)"""
13
+ ok_file = os.path.join(parent_gallery_dir, META_FOLDER, DEPRECAT_COMPLETED_FILE)
14
+ if os.path.isfile(ok_file):
15
+ return
16
+ if not _validate_gallery(parent_url, parent_gallery_dir, config, logger):
17
+ raise RuntimeError(f"Cannot deprecate from a invalid gallery {parent_gallery_dir}")
18
+ _download_gallery_meta(child_url, child_gallery_dir, config, logger)
19
+ # move from parent_gallery_dir
20
+ parent_metadata_files = _get_gallery_metadata_filenames(parent_gallery_dir)
21
+ src = {}
22
+ for metafile in parent_metadata_files:
23
+ imgfile = metafile[0:-5]
24
+ metapath = os.path.join(parent_gallery_dir, META_FOLDER, metafile)
25
+ imgpath = os.path.join(parent_gallery_dir, imgfile)
26
+ with open(metapath, 'r', encoding='utf8') as fp:
27
+ meta = json.load(fp)
28
+ src[meta['image_token']] = imgpath
29
+ # move to child_gallery_dir
30
+ child_metadata_files = _get_gallery_metadata_filenames(child_gallery_dir)
31
+ dst = {}
32
+ for metafile in child_metadata_files:
33
+ imgfile = metafile[0:-5]
34
+ metapath = os.path.join(child_gallery_dir, META_FOLDER, metafile)
35
+ imgpath = os.path.join(child_gallery_dir, imgfile)
36
+ with open(metapath, 'r', encoding='utf8') as fp:
37
+ meta = json.load(fp)
38
+ dst[meta['image_token']] = imgpath
39
+ # move them
40
+ for src_image_token, src_imgpath in src.items():
41
+ if src_image_token in dst:
42
+ os.replace(src_imgpath, dst[src_image_token])
43
+ with open(ok_file, "w", encoding='utf8'):
44
+ return # record that this gallery has been validated
ex_cd/download.py ADDED
@@ -0,0 +1,74 @@
1
+ import os
2
+ import ex_cd.gallery_dl_exec as gallery_dl
3
+ from .validate import _validate_gallery, DOWNLOAD_RESUME_FILE, VALIDATE_COMPLETED_FILE
4
+ from .collect import _load_gallery_history
5
+ from .common import metadata_args, META_FOLDER, replace_site
6
+ from .history import _get_gallery_parent_url, _get_gallery_dir
7
+ from .deprecate import _deprecate_gallery_history
8
+ from .collect import _collect_gallery_history
9
+
10
+
11
+ def _gather_gallery_history(url, gallery_dir, config, logger):
12
+ this_history = {}
13
+ ok_file = os.path.join(gallery_dir, META_FOLDER, VALIDATE_COMPLETED_FILE)
14
+ if not os.path.isfile(ok_file): # if not complete
15
+ this_history = _collect_gallery_history(gallery_dir, config, logger) # collect existing history
16
+ parent_url = _get_gallery_parent_url(url, gallery_dir, config, logger)
17
+ if parent_url == '': # if no parent
18
+ return this_history # just return it
19
+ parent_gallery_dir = _get_gallery_dir(parent_url, config, logger)
20
+ return {**this_history, **_gather_gallery_history(parent_url, parent_gallery_dir, config, logger)}
21
+
22
+
23
+ def download_gallery_history(url, gallery_dir, config, logger, history={}, depth=0):
24
+ """Download all the history of the gallery"""
25
+ parent_url = _get_gallery_parent_url(url, gallery_dir, config, logger)
26
+ if parent_url == '': # if no parent
27
+ return _download_gallery(url, gallery_dir, config, logger, history) # just download it
28
+ # if has parent
29
+ parent_gallery_dir = _get_gallery_dir(parent_url, config, logger)
30
+ if depth >= config["depth"]:
31
+ return _download_gallery(url, gallery_dir, config, logger,
32
+ {**history, **_gather_gallery_history(parent_url, parent_gallery_dir, config, logger)})
33
+ ok_file = os.path.join(gallery_dir, META_FOLDER, VALIDATE_COMPLETED_FILE)
34
+ if not os.path.isfile(ok_file): # if not complete
35
+ history = {**history, **_collect_gallery_history(gallery_dir, config, logger)} # collect existing history
36
+ # in this process, the VALIDATE_COMPLETED_FILE will be placed from old history to new
37
+ # if VALIDATE_COMPLETED_FILE is placed here, there is two condition:
38
+ # 1. all the old gallery is downloaded
39
+ # 2. this gallery is download by download_gallery_latest and there is old gallery not downloaded
40
+ # both these two conditions, this gallery should not use as history
41
+ download_gallery_history(parent_url, parent_gallery_dir, config, logger, history, depth+1) # download parent
42
+ _deprecate_gallery_history(parent_url, parent_gallery_dir, url, gallery_dir,
43
+ config, logger) # deprecate from parent
44
+ return _download_gallery(url, gallery_dir, config, logger, history) # download the rest
45
+
46
+
47
+ def _download_gallery(url, gallery_dir, config, logger, history={}):
48
+ """download by gallery_dl and validate"""
49
+ if _validate_gallery(url, gallery_dir, config, logger): # validate the gallery
50
+ return # exit
51
+ _load_gallery_history(url, gallery_dir, config, logger, history) # load existing history
52
+ if _validate_gallery(url, gallery_dir, config, logger): # validate the gallery
53
+ return # record that this gallery has been downloaded
54
+ resume_url = url
55
+ resume_file = os.path.join(gallery_dir, META_FOLDER, DOWNLOAD_RESUME_FILE)
56
+ try:
57
+ with open(resume_file, "r", encoding="utf8") as fp:
58
+ resume_url = fp.readline()
59
+ except:
60
+ pass
61
+ gallery_dl_exec = config["gallery-dl-exec"]
62
+ gallery_dl_meta_args = config["gallery-dl-meta-args"]
63
+ args = [
64
+ *gallery_dl_exec,
65
+ *metadata_args, *gallery_dl_meta_args, replace_site(resume_url, config)
66
+ ]
67
+ logger.debug(f"Exec: {args}")
68
+ returncode = gallery_dl.main(*args)
69
+ if _validate_gallery(url, gallery_dir, config, logger): # validate the gallery
70
+ return # record that this gallery has been downloaded
71
+ elif returncode != 0:
72
+ raise RuntimeError(f"Download failed: {url} -> {gallery_dir}")
73
+ else:
74
+ raise RuntimeError(f"Download not valid: {url} -> {gallery_dir}")
@@ -0,0 +1,21 @@
1
+ import subprocess
2
+
3
+
4
+ def main(*args):
5
+ return subprocess.Popen(args=args).wait()
6
+
7
+
8
+ if __name__ == "__main__":
9
+ import sys
10
+ import os
11
+ os.chdir(os.path.dirname(os.path.dirname(sys.argv[0])))
12
+ main(
13
+ sys.executable, "-m", "gallery_dl",
14
+ "--no-download",
15
+ "--write-metadata",
16
+ "--postprocessor-option", 'directory=metadata',
17
+ '-v',
18
+ '--sleep-request', '1',
19
+ '-c', '.vscode/gallery-dl.config.json',
20
+ 'https://exhentai.org/g/2752577/8ffd3778cb/'
21
+ )
ex_cd/history.py ADDED
@@ -0,0 +1,76 @@
1
+ import os
2
+ import re
3
+ from .common import get_gallery_one_metadata, _download_gallery_metadata_and_extract_gallery_dir
4
+ from .common import META_FOLDER
5
+
6
+ CHILD_NAME = 'child.url'
7
+
8
+
9
+ def _get_gallery_dir_by_re(url, path_re, root):
10
+ dirname = re.findall(path_re, url)[0]
11
+ return os.path.join(root, dirname)
12
+
13
+
14
+ def _get_gallery_dir_by_gdl(url, config, logger):
15
+ return _download_gallery_metadata_and_extract_gallery_dir(url, config, logger)
16
+
17
+
18
+ def _get_gallery_dir(url, config, logger):
19
+ try:
20
+ if config['gallery-root'] and config['path-re']:
21
+ return _get_gallery_dir_by_re(url, config['path-re'], config['gallery-root'])
22
+ except Exception as e:
23
+ logger.warn("Should parse by path-re and gallery-root, but error: %s" % e)
24
+ return _get_gallery_dir_by_gdl(url, config, logger)
25
+
26
+
27
+ url2gid_re = re.compile(r"^https://e[-x]hentai.org/g/([0-9]+)/[0-9a-z]+/*$")
28
+
29
+
30
+ def _isparent(url, gallery_dir, child_url, child_gallery_dir, config, logger):
31
+ metadata = get_gallery_one_metadata(url, gallery_dir, config, logger)
32
+ child_metadata = get_gallery_one_metadata(child_url, child_gallery_dir, config, logger)
33
+ return str(metadata['gid']) == re.findall(url2gid_re, child_metadata['parent'])[0]
34
+
35
+
36
+ def get_latest_url(url, config, logger):
37
+ '''Find url of the latest child of `url`'''
38
+ gallery_dir = _get_gallery_dir(url, config, logger)
39
+ child_path = os.path.join(gallery_dir, META_FOLDER, CHILD_NAME)
40
+ if not os.path.isfile(child_path):
41
+ return url, gallery_dir
42
+ with open(child_path, encoding='utf8') as fp:
43
+ child_url = fp.read().strip()
44
+ child_gallery_dir = None
45
+ try:
46
+ if config['gallery-root'] and config['path-re']:
47
+ child_gallery_dir = _get_gallery_dir_by_re(child_url, config['path-re'], config['gallery-root'])
48
+ except Exception as e:
49
+ logger.warn("Should parse by path-re and gallery-root, but error: %s" % e)
50
+ if not _isparent(url, gallery_dir, child_url, child_gallery_dir, config, logger): # 交叉验证
51
+ raise ValueError(f"{url} is not the parent of {child_url}")
52
+ return get_latest_url(child_url, config, logger)
53
+
54
+
55
+ def _get_gallery_parent_url(url, gallery_dir, config, logger):
56
+ metadata = get_gallery_one_metadata(url, gallery_dir, config, logger)
57
+ if 'parent' not in metadata:
58
+ raise ValueError(f"No 'parent' in {url}")
59
+ return metadata['parent']
60
+
61
+
62
+ def put_history_placeholder(url, gallery_dir, config, logger):
63
+ '''Find and tag all the parent galleries of `url`'''
64
+ parent_url = _get_gallery_parent_url(url, gallery_dir, config, logger)
65
+ if parent_url == '':
66
+ return
67
+ parent_gallery_dir = _get_gallery_dir(parent_url, config, logger)
68
+ child_file = os.path.join(parent_gallery_dir, META_FOLDER, CHILD_NAME)
69
+ os.makedirs(os.path.join(parent_gallery_dir, META_FOLDER), exist_ok=True)
70
+ if os.path.isfile(child_file):
71
+ with open(child_file, 'r', encoding='utf8') as fp:
72
+ if url == fp.read().strip():
73
+ return put_history_placeholder(parent_url, parent_gallery_dir, config, logger)
74
+ with open(child_file, 'w', encoding='utf8') as fp:
75
+ fp.write(url)
76
+ return put_history_placeholder(parent_url, parent_gallery_dir, config, logger)
ex_cd/meta.py ADDED
@@ -0,0 +1,123 @@
1
+ import os
2
+ import re
3
+ import json
4
+ import ex_cd.gallery_dl_exec as gallery_dl
5
+ from .common import META_FOLDER, metadata_args, _get_gallery_metadata_filenames, get_gallery_one_metadata, replace_site
6
+
7
+
8
+ META_DOWNLOAD_RESUME_FILE = 'MetaDownloadResume'
9
+
10
+
11
+ def _download_gallery_meta(url, gallery_dir, config, logger):
12
+ """download gallery metadata by gallery_dl"""
13
+ if _valid_gallery_meta(url, gallery_dir, config, logger): # validate the gallery
14
+ return # exit
15
+ resume_url = url
16
+ resume_file = os.path.join(gallery_dir, META_FOLDER, META_DOWNLOAD_RESUME_FILE)
17
+ try:
18
+ with open(resume_file, "r", encoding="utf8") as fp:
19
+ resume_url = fp.readline()
20
+ except:
21
+ pass
22
+ gallery_dl_exec = config["gallery-dl-exec"]
23
+ gallery_dl_meta_args = config["gallery-dl-meta-args"]
24
+ args = [
25
+ *gallery_dl_exec, "--no-download", "--no-skip",
26
+ *metadata_args, *gallery_dl_meta_args, replace_site(resume_url, config)
27
+ ]
28
+ logger.debug(f"Exec: {args}")
29
+ returncode = gallery_dl.main(*args)
30
+ if _valid_gallery_meta(url, gallery_dir, config, logger): # validate the gallery
31
+ return # record that this gallery has been downloaded
32
+ elif returncode != 0:
33
+ raise RuntimeError(f"Download gallery meta failed: {url} -> {gallery_dir}")
34
+ else:
35
+ raise RuntimeError(f"Download gallery meta invalid: {url} -> {gallery_dir}")
36
+
37
+
38
+ url2gid_re = re.compile(r"^https://e[-x]hentai.org/g/([0-9]+)/[0-9a-z]+/*$")
39
+
40
+
41
+ def _url2gid_by_re(url):
42
+ return re.findall(url2gid_re, url)[0]
43
+
44
+
45
+ url2site_re = re.compile(r"(^https://e[-x]hentai.org)/g/[0-9]+/[0-9a-z]+/*$")
46
+
47
+
48
+ def _url2site_by_re(url):
49
+ return re.findall(url2site_re, url)[0]
50
+
51
+
52
+ def _get_image_tokens(url, gallery_dir, config, logger):
53
+ # check if has enough metadata json files
54
+ meta = get_gallery_one_metadata(url, gallery_dir, config, logger)
55
+ if 'filecount' not in meta:
56
+ raise ValueError(f"'filecount' not in metadata")
57
+ metafilenames = [None] * int(meta['filecount'])
58
+ image_tokens = [None] * int(meta['filecount'])
59
+ should_deletes = [[] for _ in range(int(meta['filecount']))]
60
+ site, gid = _url2site_by_re(url), _url2gid_by_re(url)
61
+ for metafilename in _get_gallery_metadata_filenames(gallery_dir):
62
+ metafile = os.path.join(gallery_dir, META_FOLDER, metafilename)
63
+ try:
64
+ with open(metafile, "r", encoding="utf8") as fp:
65
+ meta = json.load(fp)
66
+ if str(meta["gid"]) == gid:
67
+ num = meta["num"] - 1
68
+ # sometimes there are deperated metafiles
69
+ if metafilenames[num] is not None:
70
+ should_deletes[num].append(metafilenames[num])
71
+ should_deletes[num].append(metafilename)
72
+ image_tokens[num] = metafilenames[num] = None
73
+ elif len(should_deletes[num]) > 0:
74
+ should_deletes[num].append(metafilename)
75
+ else:
76
+ image_tokens[num] = meta["image_token"]
77
+ metafilenames[num] = metafilename
78
+ except Exception as e:
79
+ logger.error(f"Invalid metadata {metafile}: {e}")
80
+ # delete deperated metafiles
81
+ n = 0
82
+ for should_delete in should_deletes:
83
+ for delete in should_delete:
84
+ metafile = os.path.join(gallery_dir, META_FOLDER, delete)
85
+ if os.path.exists(metafile):
86
+ os.remove(metafile)
87
+ n += 1
88
+ if n > 0:
89
+ completefile = os.path.join(gallery_dir, META_FOLDER, META_VALIDATE_COMPLETED_FILE)
90
+ if os.path.exists(completefile):
91
+ os.remove(completefile)
92
+ raise ValueError(f"There are {n} deperated metafiles! just deleted! should restart meta doanload!")
93
+ return site, gid, image_tokens, metafilenames
94
+
95
+
96
+ META_VALIDATE_COMPLETED_FILE = 'MetaValidateCompleted'
97
+
98
+
99
+ def _valid_gallery_meta(url, gallery_dir, config, logger):
100
+ """validate the gallery metadata"""
101
+ ok_file = os.path.join(gallery_dir, META_FOLDER, META_VALIDATE_COMPLETED_FILE)
102
+ if os.path.isfile(ok_file): # if valid
103
+ return True # exit
104
+ # check if has enough metadata json files
105
+ resume_url = url
106
+ try:
107
+ site, gid, image_tokens, _ = _get_image_tokens(url, gallery_dir, config, logger)
108
+ for i, image_token in enumerate(image_tokens):
109
+ if image_token:
110
+ resume_url = f"{site}/s/{image_token}/{gid}-{i+1}"
111
+ else:
112
+ break
113
+ if None in image_tokens:
114
+ resume_file = os.path.join(gallery_dir, META_FOLDER, META_DOWNLOAD_RESUME_FILE)
115
+ with open(resume_file, "w", encoding='utf8') as fp:
116
+ fp.write(resume_url)
117
+ logger.error(f"Invalid {gallery_dir}: no enough metadata files, should resume from {resume_url}")
118
+ return False
119
+ except Exception as e:
120
+ logger.error(f"Invalid {gallery_dir}: {e}")
121
+
122
+ with open(ok_file, "w", encoding='utf8'):
123
+ return True # record that this gallery has been validated
ex_cd/output.py ADDED
@@ -0,0 +1,33 @@
1
+ import logging
2
+
3
+
4
+ class Logger(logging.Logger):
5
+ """Custom Logger that includes extra info in log records"""
6
+
7
+ def makeRecord(self, name, level, fn, lno, msg, args, exc_info,
8
+ func=None, extra=None, sinfo=None,
9
+ factory=logging._logRecordFactory):
10
+ rv = factory(name, level, fn, lno, msg, args, exc_info, func, sinfo)
11
+ if extra:
12
+ rv.__dict__.update(extra)
13
+ return rv
14
+
15
+
16
+ def initialize_logging(loglevel):
17
+ """Setup basic logging functionality before configfiles have been loaded"""
18
+ # convert levelnames to lowercase
19
+ for level in (10, 20, 30, 40, 50):
20
+ name = logging.getLevelName(level)
21
+ logging.addLevelName(level, name.lower())
22
+
23
+ # register custom Logging class
24
+ logging.Logger.manager.setLoggerClass(Logger)
25
+
26
+ # setup basic logging to stderr
27
+ handler = logging.StreamHandler()
28
+ handler.setLevel(loglevel)
29
+ root = logging.getLogger()
30
+ root.setLevel(logging.NOTSET)
31
+ root.addHandler(handler)
32
+
33
+ return logging.getLogger("ex-cd")
@@ -0,0 +1,29 @@
1
+ import json
2
+ from .config import build_parser
3
+ from .delete import delete_gallery_history
4
+ from .latest_meta import get_latest_gallery_metadata
5
+ from ..history import put_history_placeholder
6
+ from ..config import read_config
7
+ from ..output import initialize_logging
8
+
9
+
10
+ def main():
11
+ parser = build_parser()
12
+ args = parser.parse_args()
13
+ logger = initialize_logging(args.loglevel)
14
+ config = read_config(args, logger)
15
+ logger.info(f"Parsed config: {args}")
16
+
17
+ if args.command == "delete":
18
+ delete_gallery_history(args.url, config, logger)
19
+
20
+ elif args.command == "latest-meta":
21
+ latest_url, gallery_dir, metadata = get_latest_gallery_metadata(args.url, config, logger)
22
+ put_history_placeholder(latest_url, gallery_dir, config, logger)
23
+ output = json.dumps(metadata, ensure_ascii=False, indent=2)
24
+ if args.output:
25
+ with open(args.output, "w", encoding="utf8") as fp:
26
+ fp.write(output)
27
+ logger.info(f"Metadata saved to {args.output}")
28
+ else:
29
+ print(output)
@@ -0,0 +1,5 @@
1
+ import sys
2
+ import ex_cd.tools
3
+
4
+ if __name__ == "__main__":
5
+ sys.exit(ex_cd.tools.main())
ex_cd/tools/config.py ADDED
@@ -0,0 +1,48 @@
1
+ import argparse
2
+ import logging
3
+
4
+
5
+ def build_parser():
6
+ """Build and configure an ArgumentParser with subcommands"""
7
+ parser = argparse.ArgumentParser(
8
+ prog="ex_cd.tools",
9
+ description="ex-cd tools for gallery management"
10
+ )
11
+
12
+ # Common options
13
+ parser.add_argument(
14
+ "-c", "--config",
15
+ dest="config", type=str,
16
+ help="Path to config json file or a json string",
17
+ default=None,
18
+ )
19
+ parser.add_argument(
20
+ "-q", "--quiet",
21
+ dest="loglevel", default=logging.INFO,
22
+ action="store_const", const=logging.ERROR,
23
+ help="Activate quiet mode",
24
+ )
25
+ parser.add_argument(
26
+ "-v", "--verbose",
27
+ dest="loglevel",
28
+ action="store_const", const=logging.DEBUG,
29
+ help="Print various debugging information",
30
+ )
31
+
32
+ # Subcommands
33
+ subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")
34
+
35
+ # delete subcommand
36
+ delete_parser = subparsers.add_parser("delete", help="Delete all history of a gallery")
37
+ delete_parser.add_argument("url", type=str, help="URL of the gallery to delete")
38
+
39
+ # latest-meta subcommand
40
+ meta_parser = subparsers.add_parser("latest-meta", help="Get metadata of the latest version of a gallery")
41
+ meta_parser.add_argument("url", type=str, help="URL of the gallery")
42
+ meta_parser.add_argument(
43
+ "-o", "--output",
44
+ dest="output", type=str, default=None,
45
+ help="Output file path for metadata JSON (default: print to stdout)"
46
+ )
47
+
48
+ return parser
ex_cd/tools/delete.py ADDED
@@ -0,0 +1,119 @@
1
+ import os
2
+ import shutil
3
+ from ..common import META_FOLDER
4
+ from ..meta import META_VALIDATE_COMPLETED_FILE, META_DOWNLOAD_RESUME_FILE
5
+ from ..validate import VALIDATE_COMPLETED_FILE, DOWNLOAD_RESUME_FILE
6
+ from ..deprecate import DEPRECAT_COMPLETED_FILE
7
+ from ..history import get_latest_url, put_history_placeholder, _get_gallery_dir, _get_gallery_parent_url
8
+
9
+
10
+ def _get_all_history_dirs(url, gallery_dir, config, logger):
11
+ """Get all gallery directories in the history chain, from newest to oldest"""
12
+ dirs = [(url, gallery_dir)]
13
+ try:
14
+ parent_url = _get_gallery_parent_url(url, gallery_dir, config, logger)
15
+ except Exception as e:
16
+ logger.debug(f"No parent for {url}: {e}")
17
+ return dirs
18
+ while parent_url:
19
+ parent_gallery_dir = _get_gallery_dir(parent_url, config, logger)
20
+ dirs.append((parent_url, parent_gallery_dir))
21
+ try:
22
+ parent_url = _get_gallery_parent_url(parent_url, parent_gallery_dir, config, logger)
23
+ except Exception:
24
+ break
25
+ return dirs
26
+
27
+
28
+ def _delete_gallery_content(gallery_dir, logger):
29
+ """Delete all content from a gallery in a resumable way"""
30
+ meta_folder = os.path.join(gallery_dir, META_FOLDER)
31
+
32
+ # Step 1: Delete download completion markers
33
+ for marker in [VALIDATE_COMPLETED_FILE, DOWNLOAD_RESUME_FILE, DEPRECAT_COMPLETED_FILE]:
34
+ marker_path = os.path.join(meta_folder, marker)
35
+ if os.path.isfile(marker_path):
36
+ os.remove(marker_path)
37
+ logger.debug(f"Deleted marker: {marker_path}")
38
+
39
+ # Step 2: Delete all files outside .metadata (images)
40
+ if os.path.isdir(gallery_dir):
41
+ for item in os.listdir(gallery_dir):
42
+ if item == META_FOLDER:
43
+ continue
44
+ item_path = os.path.join(gallery_dir, item)
45
+ if os.path.isfile(item_path):
46
+ os.remove(item_path)
47
+ logger.debug(f"Deleted file: {item_path}")
48
+ elif os.path.isdir(item_path):
49
+ shutil.rmtree(item_path)
50
+ logger.debug(f"Deleted dir: {item_path}")
51
+
52
+ # Step 3: Delete meta completion markers
53
+ for marker in [META_VALIDATE_COMPLETED_FILE, META_DOWNLOAD_RESUME_FILE]:
54
+ marker_path = os.path.join(meta_folder, marker)
55
+ if os.path.isfile(marker_path):
56
+ os.remove(marker_path)
57
+ logger.debug(f"Deleted meta marker: {marker_path}")
58
+
59
+ # Step 4: Delete JSON files in .metadata
60
+ if os.path.isdir(meta_folder):
61
+ for item in os.listdir(meta_folder):
62
+ if item.endswith('.json'):
63
+ item_path = os.path.join(meta_folder, item)
64
+ os.remove(item_path)
65
+ logger.debug(f"Deleted json: {item_path}")
66
+
67
+ # Step 5: Delete remaining files in .metadata (like child.url)
68
+ if os.path.isdir(meta_folder):
69
+ for item in os.listdir(meta_folder):
70
+ item_path = os.path.join(meta_folder, item)
71
+ if os.path.isfile(item_path):
72
+ os.remove(item_path)
73
+ logger.debug(f"Deleted remaining: {item_path}")
74
+
75
+ # Step 6: Delete the .metadata folder
76
+ if os.path.isdir(meta_folder):
77
+ try:
78
+ os.rmdir(meta_folder)
79
+ except OSError:
80
+ shutil.rmtree(meta_folder)
81
+ logger.debug(f"Deleted metadata folder: {meta_folder}")
82
+
83
+ # Step 7: Delete the gallery folder itself
84
+ if os.path.isdir(gallery_dir):
85
+ try:
86
+ os.rmdir(gallery_dir)
87
+ except OSError:
88
+ shutil.rmtree(gallery_dir)
89
+ logger.debug(f"Deleted gallery folder: {gallery_dir}")
90
+
91
+
92
+ def delete_gallery_history(url, config, logger):
93
+ """Delete all history of a gallery
94
+
95
+ Args:
96
+ url: URL of the gallery (can be any version in the history chain)
97
+ config: Configuration dict
98
+ logger: Logger instance
99
+ """
100
+ # Step 1: Get the latest URL and create placeholder chain
101
+ url, gallery_dir = get_latest_url(url, config, logger)
102
+ logger.info(f"Latest gallery: {url} -> {gallery_dir}")
103
+ put_history_placeholder(url, gallery_dir, config, logger)
104
+
105
+ # Step 2: Collect all gallery directories (newest to oldest)
106
+ all_dirs = _get_all_history_dirs(url, gallery_dir, config, logger)
107
+ logger.info(f"Found {len(all_dirs)} galleries to delete:")
108
+ for i, (dir_url, dir_path) in enumerate(all_dirs):
109
+ logger.info(f" [{i+1}] {dir_path}")
110
+
111
+ # Step 3: Delete from newest to oldest
112
+ for dir_url, dir_path in all_dirs:
113
+ if os.path.isdir(dir_path):
114
+ logger.info(f"Deleting: {dir_path}")
115
+ _delete_gallery_content(dir_path, logger)
116
+ else:
117
+ logger.debug(f"Already deleted: {dir_path}")
118
+
119
+ logger.info("Delete completed")
@@ -0,0 +1,24 @@
1
+ from ..common import get_gallery_one_metadata
2
+ from ..history import get_latest_url
3
+
4
+
5
+ def get_latest_gallery_metadata(url, config, logger):
6
+ """Get metadata of the latest version of a gallery
7
+
8
+ Args:
9
+ url: URL of the gallery (can be any version in the history chain)
10
+ config: Configuration dict
11
+ logger: Logger instance
12
+
13
+ Returns:
14
+ tuple: (latest_url, gallery_dir, metadata)
15
+ """
16
+ # Step 1: Get the latest URL
17
+ latest_url, gallery_dir = get_latest_url(url, config, logger)
18
+ logger.info(f"Latest gallery: {latest_url} -> {gallery_dir}")
19
+
20
+ # Step 2: Get metadata
21
+ metadata = get_gallery_one_metadata(latest_url, gallery_dir, config, logger)
22
+ logger.info(f"Got metadata for gid={metadata.get('gid')}, title={metadata.get('title')}")
23
+
24
+ return latest_url, gallery_dir, metadata
ex_cd/validate.py ADDED
@@ -0,0 +1,80 @@
1
+ import os
2
+ import re
3
+ import hashlib
4
+ from .common import META_FOLDER
5
+ from .meta import _valid_gallery_meta, _get_image_tokens
6
+
7
+
8
+ VALIDATE_COMPLETED_FILE = 'ValidateCompleted'
9
+ DOWNLOAD_RESUME_FILE = 'DownloadResume'
10
+
11
+
12
+ def _validate_gallery(url, gallery_dir, config, logger):
13
+ """validate the gallery"""
14
+ ok_file = os.path.join(gallery_dir, META_FOLDER, VALIDATE_COMPLETED_FILE)
15
+ if os.path.isfile(ok_file): # if valid
16
+ return True # exit
17
+
18
+ # check if has enough metadata json files
19
+ if not _valid_gallery_meta(url, gallery_dir, config, logger):
20
+ return False
21
+ site, gid, image_tokens, metafiles = _get_image_tokens(url, gallery_dir, config, logger)
22
+
23
+ # check if has enough image files
24
+ images = []
25
+ for img in os.listdir(gallery_dir):
26
+ if img == META_FOLDER:
27
+ continue
28
+ images.append(img)
29
+
30
+ resume_file = os.path.join(gallery_dir, META_FOLDER, DOWNLOAD_RESUME_FILE)
31
+ start_from = None
32
+ try:
33
+ with open(resume_file, "r", encoding="utf8") as fp:
34
+ exist_resume_url = fp.readline()
35
+ start_from_re = re.findall(fr"^https://e[-x]hentai.org/s/[0-9a-z]+/{gid}-([0-9]+)$", exist_resume_url)
36
+ if len(start_from_re) > 0:
37
+ start_from = int(start_from_re[0])
38
+ except:
39
+ pass
40
+
41
+ resume_url = url
42
+ ok = True
43
+ # check if image content SHA1 match image_token
44
+ for i, (image_token, metafile) in enumerate(zip(image_tokens, metafiles)):
45
+ img = metafile[0:-5]
46
+ if img not in images:
47
+ logger.error(f"Invalid {gallery_dir}: no image {img} for {metafile}")
48
+ resume_url = f"{site}/s/{image_token}/{gid}-{i+1}"
49
+ ok = False
50
+ break
51
+ # check resume
52
+ if start_from is not None:
53
+ if i+1 < start_from:
54
+ logger.debug(f"Skip validate {img}: start from {start_from}, current {i+1}")
55
+ continue
56
+ # compare image_token
57
+ imgfile = os.path.join(gallery_dir, img)
58
+ try:
59
+ with open(imgfile, mode="rb") as fp:
60
+ sha1 = hashlib.sha1(fp.read()).hexdigest()
61
+ if image_token != sha1[0:10]:
62
+ logger.error(f"Invalid {imgfile}: image token not match, {image_token} != {sha1}, delete the image")
63
+ os.remove(imgfile)
64
+ resume_url = f"{site}/s/{image_token}/{gid}-{i+1}"
65
+ ok = False
66
+ break
67
+ except Exception as e:
68
+ logger.error(f"Invalid {imgfile}: cannot compare token, {e}, delete the image")
69
+ os.remove(imgfile)
70
+ resume_url = f"{site}/s/{image_token}/{gid}-{i+1}"
71
+ ok = False
72
+ break
73
+ if ok:
74
+ with open(ok_file, "w", encoding='utf8'):
75
+ return True # record that this gallery has been validated
76
+ else:
77
+ with open(resume_file, "w", encoding='utf8') as fp:
78
+ fp.write(resume_url)
79
+ logger.error(f"Invalid {gallery_dir}: no enough images, should resume from {resume_url}")
80
+ return False
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: ex_cd
3
+ Version: 1.17.0
4
+ Summary: 高效下载E站gallery的所有历史数据
5
+ Author-email: yindaheng98 <yindaheng98@163.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yindaheng98/ex-cd
8
+ Project-URL: Repository, https://github.com/yindaheng98/ex-cd
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.7
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: gallery-dl
16
+ Dynamic: license-file
17
+
18
+ # ex-cd
19
+
20
+ 高效下载E站gallery的所有历史数据
21
+
22
+ * 尽量避免集中数据库,能放进文件夹的数据尽量放进文件夹
23
+ * 尽量减少请求操作,能只用读文件的尽量只读文件
24
+ * 尽量减少文件读写操作,能只用读文件列表的尽量只读文件列表
25
+
26
+ ## Usage
27
+
28
+ ```sh
29
+ python -m ex_cd -c .vscode/config.json https://exhentai.org/g/2635845/ecbc9d9681/
30
+ ```
31
+
32
+ ```sh
33
+ python -m ex_cd -c <a json string> https://exhentai.org/g/2635845/ecbc9d9681/
34
+ ```
35
+
36
+ You can see the example config file: `.vscode/config.json`
37
+
38
+ You can also set an `EXCD_CONFIG_FILE` env to specify a file, and the config in this file will be overridden by the config specified by `-c`:
39
+
40
+ ```sh
41
+ export EXCD_CONFIG_FILE=".vscode/config.json"
42
+ python -m ex_cd -c <a json string> https://exhentai.org/g/2635845/ecbc9d9681/
43
+ ```
44
+
45
+ You can see the example command line: `.vscode/launch.json`
46
+
47
+ ## How does it work?
48
+
49
+ ### URL更新
50
+
51
+ ```mermaid
52
+ flowchart TD
53
+
54
+ UrlCheck1[输入URL] --> UrlCheck2(从URL中提取目标文件夹路径\ngallery-dl --dump-json '%s' --range 0\n< gallery_path >)
55
+ UrlCheck2 --> UrlCheck3(检查是否是过时内容\n< gallery_path >/metadata/child.url是否存在)
56
+ UrlCheck3 --> UrlCheck4{child.url存在 ?}
57
+ UrlCheck4 -->|是| UrlCheck5(按照child.url更新URL为最新) --> UrlCheck1
58
+ UrlCheck4 -->|否| MetaCheck1[结束\n返回最新URL] --> OldPlacehold[后台执行\n过时元数据占位]
59
+ ```
60
+
61
+ ### 过时元数据占位
62
+
63
+ ```mermaid
64
+ flowchart TD
65
+
66
+ UrlCheck1[输入URL] --> UrlCheck2(从URL中提取目标文件夹路径\ngallery-dl --dump-json '%s' --range 0\n< gallery_path >) --> MetaCheck1(检查元数据文件存在性\n< gallery_path >/metadata/*.json 文件存在)
67
+ MetaCheck1 --> MetaCheck2{元数据文件存在 ?}
68
+ MetaCheck2 -->|是| MetaCheck3(检查parent存在性\n元数据文件中存在parent字段) --> MetaCheck4{parent字段存在 ?} -->|是| UrlCheck3(按照parent字段更新URL为过时URL) --> UrlCheck1
69
+ UrlCheck3 --> OldPlacehold1(从URL中提取目标文件夹路径) --> OldPlacehold2[在目标文件夹路径下放置child.url]
70
+ MetaCheck2 -->|否| MetaCheck5(下载一个元数据\ngallery-dl -v '%s' --no-download --range 0)
71
+ MetaCheck4 -->|否| MetaCheck5 --> MetaCheck1
72
+ ```
73
+
74
+ ### 元数据下载
75
+
76
+ ```mermaid
77
+ flowchart TD
78
+
79
+ UrlCheck1[输入URL] --> URL更新 --> UrlCheck2(从URL中提取目标文件夹路径\ngallery-dl --dump-json '%s' --range 0\n< gallery_path >) --> MetaCheck1(检查元数据文件存在性\n< gallery_path >/metadata/*.json 文件存在)
80
+ MetaCheck1 --> MetaCheck2{元数据文件存在 ?}
81
+ MetaCheck2 -->|是| MetaCheck4(检查元数据完整性\n< gallery_path >/metadata/*.json 每个文件都可json解析\n其中 'filecount' 值和 < gallery_path >/metadata/*.json 文件数相等)
82
+ MetaCheck4 --> MetaCheck5{元数据文件完整 ?}
83
+ MetaCheck5 -->|否| MetaCheck3
84
+ MetaCheck2 -->|否| MetaCheck3(下载元数据 gallery-dl -v '%s' --no-download) --> MetaCheck1
85
+ MetaCheck5 -->|是| MetaCheck6[结束]
86
+ MetaCheck3 --> MetaCheck6
87
+ ```
88
+
89
+ ### 图片下载
90
+
91
+ !!!!!!!!! TODO: 确定是最新之后,元数据下载和图片下载同时进行 !!!!!!!!!
92
+
93
+ ```mermaid
94
+ flowchart TD
95
+ UrlCheck1[输入URL] --> UrlCheck2[URL更新] --> ImgCheck1(检查图片文件存在性: \n< gallery_path >/metadata/*.json 对应的每一个图片文件都存在) --> ImgCheck2{图片文件均存在 ?} -->|是| ImgCheck3(检查图片文件内容: \n< gallery_path >/metadata/*.json 对应的图片文件的SHA1值都与< image_token >字段值相符) --> ImgCheck4{图片文件内容均符合image_token ?} -->|是| ImgCheck5[结束]
96
+ ImgCheck2 -->|否| Download(调用gallery-dl下载)
97
+ ImgCheck4 -->|否| Download
98
+ Download --> ImgCheck5
99
+ ```
@@ -0,0 +1,22 @@
1
+ ex_cd/__init__.py,sha256=v5oliHtuQqbpMTgJM2tpDB6_pRHaiCvU-bKEfwjFd3U,989
2
+ ex_cd/__main__.py,sha256=RFTiY-4W5PRgmFW-vTX36xXpg0zkat1kPOgnuicj8dk,78
3
+ ex_cd/collect.py,sha256=ZMVGiPhi_T05MqSb-aT4RIcjGgsjPD6RknHiQmH3YSI,2375
4
+ ex_cd/common.py,sha256=q2VJAfPyQKkJi_LM-vhmk1MlLbCsCDKY7nVMLR1bD6k,3544
5
+ ex_cd/config.py,sha256=UpbE0WIN81U_llVSFZBuBzssWiQpR46WXMpkTGmHRKU,2903
6
+ ex_cd/deprecate.py,sha256=96PWa88jaa4e6tkb8cLDHZfcfbyFpDBr6YKNIOAEvBU,2023
7
+ ex_cd/download.py,sha256=CVTT6dIdCnflry4BlB-1f-uFRRe41C1AXqY6H4th_7g,4061
8
+ ex_cd/gallery_dl_exec.py,sha256=czIO6ea_9spFdt0i5Vrb_9DKxOh5k36qA-WFh8wLdRw,517
9
+ ex_cd/history.py,sha256=JjJzSRDjuUiOVlP1cqbhrlXuIf2QyAZFvye936w8ChU,3270
10
+ ex_cd/meta.py,sha256=mQKEWBAgsmhiojBPfkwE9xL6sUlW9ach4mbmpX0p1YY,5062
11
+ ex_cd/output.py,sha256=eOoWIsYEypxDcZncf-jrm-9HODRi_CnVWbaJNHOLrB4,1052
12
+ ex_cd/validate.py,sha256=Y2ApFzrSUaQMryV1gRuiBXMECKO5dvxPRrC3O60Rh6Q,3046
13
+ ex_cd/tools/__init__.py,sha256=ARvxJXlhJ0kgV4vhCGNgFpV32s-Xp6sO87Piwr4qjrs,1053
14
+ ex_cd/tools/__main__.py,sha256=yhxacDeHmwg_y9Xn_U3G4rTOlqIKfQ6xREEPdWXMY5k,91
15
+ ex_cd/tools/config.py,sha256=uwr2ngbfkvl6Dcc1GuhXJrSjo3YSXWX9V5sW5uaXklA,1552
16
+ ex_cd/tools/delete.py,sha256=xisoPW7Hcv4SEiHVh8SyI93GIFFlwOHjq5BoPillw14,4692
17
+ ex_cd/tools/latest_meta.py,sha256=-NdmUuCSLrdeS0_tnlPaR0dFCBCXoo76DMbpGMALfqI,833
18
+ ex_cd-1.17.0.dist-info/licenses/LICENSE,sha256=-OPEaaNL-VWb8CSDMMCDftVBnZkSlWIN9zn9BnqKld8,1067
19
+ ex_cd-1.17.0.dist-info/METADATA,sha256=U9Q9dou5fd4t5R40kajcynq6cPnC1pD4Cmd-t7SAuxs,4245
20
+ ex_cd-1.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ ex_cd-1.17.0.dist-info/top_level.txt,sha256=MIjfJbCr3rAXXTqGS9pAN1nALoq1hNP4VFivF78A_z0,6
22
+ ex_cd-1.17.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Howard Yin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ ex_cd