copyparty 1.14.4__py3-none-any.whl → 1.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
copyparty/__main__.py CHANGED
@@ -986,9 +986,10 @@ def add_upload(ap):
986
986
  ap2.add_argument("--reg-cap", metavar="N", type=int, default=38400, help="max number of uploads to keep in memory when running without \033[33m-e2d\033[0m; roughly 1 MiB RAM per 600")
987
987
  ap2.add_argument("--no-fpool", action="store_true", help="disable file-handle pooling -- instead, repeatedly close and reopen files during upload (bad idea to enable this on windows and/or cow filesystems)")
988
988
  ap2.add_argument("--use-fpool", action="store_true", help="force file-handle pooling, even when it might be dangerous (multiprocessing, filesystems lacking sparse-files support, ...)")
989
- ap2.add_argument("--hardlink", action="store_true", help="prefer hardlinks instead of symlinks when possible (within same filesystem) (volflag=hardlink)")
990
- ap2.add_argument("--never-symlink", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made (volflag=neversymlink)")
991
- ap2.add_argument("--no-dedup", action="store_true", help="disable symlink/hardlink creation; copy file contents instead (volflag=copydupes)")
989
+ ap2.add_argument("--dedup", action="store_true", help="enable symlink-based upload deduplication (volflag=dedup)")
990
+ ap2.add_argument("--safe-dedup", metavar="N", type=int, default=50, help="how careful to be when deduplicating files; [\033[32m1\033[0m] = just verify the filesize, [\033[32m50\033[0m] = verify file contents have not been altered (volflag=safededup)")
991
+ ap2.add_argument("--hardlink", action="store_true", help="enable hardlink-based dedup; will fallback on symlinks when that is impossible (across filesystems) (volflag=hardlink)")
992
+ ap2.add_argument("--hardlink-only", action="store_true", help="do not fallback to symlinks when a hardlink cannot be made (volflag=hardlinkonly)")
992
993
  ap2.add_argument("--no-dupe", action="store_true", help="reject duplicate files during upload; only matches within the same volume (volflag=nodupe)")
993
994
  ap2.add_argument("--no-snap", action="store_true", help="disable snapshots -- forget unfinished uploads on shutdown; don't create .hist/up2k.snap files -- abandoned/interrupted uploads must be cleaned up manually")
994
995
  ap2.add_argument("--snap-wri", metavar="SEC", type=int, default=300, help="write upload state to ./hist/up2k.snap every \033[33mSEC\033[0m seconds; allows resuming incomplete uploads after a server crash")
@@ -1279,6 +1280,7 @@ def add_logging(ap):
1279
1280
  ap2.add_argument("--ansi", action="store_true", help="force colors; overrides environment-variable NO_COLOR")
1280
1281
  ap2.add_argument("--no-logflush", action="store_true", help="don't flush the logfile after each write; tiny bit faster")
1281
1282
  ap2.add_argument("--no-voldump", action="store_true", help="do not list volumes and permissions on startup")
1283
+ ap2.add_argument("--log-utc", action="store_true", help="do not use local timezone; assume the TZ env-var is UTC (tiny bit faster)")
1282
1284
  ap2.add_argument("--log-tdec", metavar="N", type=int, default=3, help="timestamp resolution / number of timestamp decimals")
1283
1285
  ap2.add_argument("--log-badpwd", metavar="N", type=int, default=1, help="log failed login attempt passwords: 0=terse, 1=plaintext, 2=hashed")
1284
1286
  ap2.add_argument("--log-conn", action="store_true", help="debug: print tcp-server msgs")
@@ -1337,7 +1339,7 @@ def add_transcoding(ap):
1337
1339
  def add_db_general(ap, hcores):
1338
1340
  noidx = APPLESAN_TXT if MACOS else ""
1339
1341
  ap2 = ap.add_argument_group('general db options')
1340
- ap2.add_argument("-e2d", action="store_true", help="enable up2k database, making files searchable + enables upload deduplication")
1342
+ ap2.add_argument("-e2d", action="store_true", help="enable up2k database; this enables file search, upload-undo, improves deduplication")
1341
1343
  ap2.add_argument("-e2ds", action="store_true", help="scan writable folders for new files on startup; sets \033[33m-e2d\033[0m")
1342
1344
  ap2.add_argument("-e2dsa", action="store_true", help="scans all folders on startup; sets \033[33m-e2ds\033[0m")
1343
1345
  ap2.add_argument("-e2v", action="store_true", help="verify file integrity; rehash all files and compare with db")
@@ -1350,7 +1352,7 @@ def add_db_general(ap, hcores):
1350
1352
  ap2.add_argument("--re-dhash", action="store_true", help="force a cache rebuild on startup; enable this once if it gets out of sync (should never be necessary)")
1351
1353
  ap2.add_argument("--no-forget", action="store_true", help="never forget indexed files, even when deleted from disk -- makes it impossible to ever upload the same file twice -- only useful for offloading uploads to a cloud service or something (volflag=noforget)")
1352
1354
  ap2.add_argument("--dbd", metavar="PROFILE", default="wal", help="database durability profile; sets the tradeoff between robustness and speed, see \033[33m--help-dbd\033[0m (volflag=dbd)")
1353
- ap2.add_argument("--xlink", action="store_true", help="on upload: check all volumes for dupes, not just the target volume (volflag=xlink)")
1355
+ ap2.add_argument("--xlink", action="store_true", help="on upload: check all volumes for dupes, not just the target volume (probably buggy, not recommended) (volflag=xlink)")
1354
1356
  ap2.add_argument("--hash-mt", metavar="CORES", type=int, default=hcores, help="num cpu cores to use for file hashing; set 0 or 1 for single-core hashing")
1355
1357
  ap2.add_argument("--re-maxage", metavar="SEC", type=int, default=0, help="rescan filesystem for changes every \033[33mSEC\033[0m seconds; 0=off (volflag=scan)")
1356
1358
  ap2.add_argument("--db-act", metavar="SEC", type=float, default=10.0, help="defer any scheduled volume reindexing until \033[33mSEC\033[0m seconds after last db write (uploads, renames, ...)")
@@ -1613,6 +1615,7 @@ def main(argv = None, rsrc = None) :
1613
1615
  ("--hdr-au-usr", "--idp-h-usr"),
1614
1616
  ("--idp-h-sep", "--idp-gsep"),
1615
1617
  ("--th-no-crop", "--th-crop=n"),
1618
+ ("--never-symlink", "--hardlink-only"),
1616
1619
  ]
1617
1620
  for dk, nk in deprecated:
1618
1621
  idx = -1
@@ -1637,7 +1640,7 @@ def main(argv = None, rsrc = None) :
1637
1640
  argv.extend(["--qr"])
1638
1641
  if ANYWIN or not os.geteuid():
1639
1642
  # win10 allows symlinks if admin; can be unexpected
1640
- argv.extend(["-p80,443,3923", "--ign-ebind", "--no-dedup"])
1643
+ argv.extend(["-p80,443,3923", "--ign-ebind"])
1641
1644
  except:
1642
1645
  pass
1643
1646
 
copyparty/__version__.py CHANGED
@@ -1,8 +1,8 @@
1
1
  # coding: utf-8
2
2
 
3
- VERSION = (1, 14, 4)
4
- CODENAME = "one step forward"
5
- BUILD_DT = (2024, 9, 2)
3
+ VERSION = (1, 15, 0)
4
+ CODENAME = "fill the drives"
5
+ BUILD_DT = (2024, 9, 8)
6
6
 
7
7
  S_VERSION = ".".join(map(str, VERSION))
8
8
  S_BUILD_DT = "{0:04d}-{1:02d}-{2:02d}".format(*BUILD_DT)
copyparty/authsrv.py CHANGED
@@ -1884,6 +1884,11 @@ class AuthSrv(object):
1884
1884
  if len(zs) == 3: # fc5 => ffcc55
1885
1885
  vol.flags["tcolor"] = "".join([x * 2 for x in zs])
1886
1886
 
1887
+ if vol.flags.get("neversymlink"):
1888
+ vol.flags["hardlinkonly"] = True # was renamed
1889
+ if vol.flags.get("hardlinkonly"):
1890
+ vol.flags["hardlink"] = True
1891
+
1887
1892
  for k1, k2 in IMPLICATIONS:
1888
1893
  if k1 in vol.flags:
1889
1894
  vol.flags[k2] = True
@@ -1988,9 +1993,6 @@ class AuthSrv(object):
1988
1993
  for x in drop:
1989
1994
  vol.flags.pop(x)
1990
1995
 
1991
- if vol.flags.get("neversymlink") and not vol.flags.get("hardlink"):
1992
- vol.flags["copydupes"] = True
1993
-
1994
1996
  # verify tags mentioned by -mt[mp] are used by -mte
1995
1997
  local_mtp = {}
1996
1998
  local_only_mtp = {}
@@ -2069,6 +2071,8 @@ class AuthSrv(object):
2069
2071
 
2070
2072
  have_e2d = False
2071
2073
  have_e2t = False
2074
+ have_dedup = False
2075
+ unsafe_dedup = []
2072
2076
  t = "volumes and permissions:\n"
2073
2077
  for zv in vfs.all_vols.values():
2074
2078
  if not self.warn_anonwrite or verbosity < 5:
@@ -2101,6 +2105,11 @@ class AuthSrv(object):
2101
2105
  if "e2t" in zv.flags:
2102
2106
  have_e2t = True
2103
2107
 
2108
+ if "dedup" in zv.flags:
2109
+ have_dedup = True
2110
+ if "e2d" not in zv.flags and "hardlink" not in zv.flags:
2111
+ unsafe_dedup.append("/" + zv.vpath)
2112
+
2104
2113
  t += "\n"
2105
2114
 
2106
2115
  if self.warn_anonwrite and verbosity > 4:
@@ -2113,10 +2122,17 @@ class AuthSrv(object):
2113
2122
  self.log("\n\033[{}\033[0m\n".format(t))
2114
2123
 
2115
2124
  if not have_e2t:
2116
- t = "hint: argument -e2ts enables multimedia indexing (artist/title/...)"
2125
+ t = "hint: enable multimedia indexing (artist/title/...) with argument -e2ts"
2117
2126
  self.log(t, 6)
2118
2127
  else:
2119
- t = "hint: argument -e2dsa enables searching, upload-undo, and better deduplication"
2128
+ t = "hint: enable searching and upload-undo with argument -e2dsa"
2129
+ self.log(t, 6)
2130
+
2131
+ if unsafe_dedup:
2132
+ t = "WARNING: symlink-based deduplication is enabled for some volumes, but without indexing. Please enable -e2dsa and/or --hardlink to avoid problems when moving/renaming files. Affected volumes: %s"
2133
+ self.log(t % (", ".join(unsafe_dedup)), 3)
2134
+ elif not have_dedup:
2135
+ t = "hint: enable upload deduplication with --dedup (but see readme for consequences)"
2120
2136
  self.log(t, 6)
2121
2137
 
2122
2138
  zv, _ = vfs.get("/", "*", False, False)
copyparty/cfg.py CHANGED
@@ -12,8 +12,7 @@ def vf_bmap() :
12
12
  "dav_auth": "davauth",
13
13
  "dav_rt": "davrt",
14
14
  "ed": "dots",
15
- "never_symlink": "neversymlink",
16
- "no_dedup": "copydupes",
15
+ "hardlink_only": "hardlinkonly",
17
16
  "no_dupe": "nodupe",
18
17
  "no_forget": "noforget",
19
18
  "no_pipe": "nopipe",
@@ -23,6 +22,7 @@ def vf_bmap() :
23
22
  "no_athumb": "dathumb",
24
23
  }
25
24
  for k in (
25
+ "dedup",
26
26
  "dotsrch",
27
27
  "e2d",
28
28
  "e2ds",
@@ -58,6 +58,7 @@ def vf_vmap() :
58
58
  "no_hash": "nohash",
59
59
  "no_idx": "noidx",
60
60
  "re_maxage": "scan",
61
+ "safe_dedup": "safededup",
61
62
  "th_convt": "convt",
62
63
  "th_size": "thsize",
63
64
  "th_crop": "crop",
@@ -129,10 +130,11 @@ permdescs = {
129
130
 
130
131
  flagcats = {
131
132
  "uploads, general": {
133
+ "dedup": "enable symlink-based file deduplication",
134
+ "hardlink": "enable hardlink-based file deduplication,\nwith fallback on symlinks when that is impossible",
135
+ "hardlinkonly": "dedup with hardlink only, never symlink;\nmake a full copy if hardlink is impossible",
136
+ "safededup": "verify on-disk data before using it for dedup",
132
137
  "nodupe": "rejects existing files (instead of symlinking them)",
133
- "hardlink": "does dedup with hardlinks instead of symlinks",
134
- "neversymlink": "disables symlink fallback; full copy instead",
135
- "copydupes": "disables dedup, always saves full copies of dupes",
136
138
  "sparse": "force use of sparse files, mainly for s3-backed storage",
137
139
  "daw": "enable full WebDAV write support (dangerous);\nPUT-operations will now \033[1;31mOVERWRITE\033[0;35m existing files",
138
140
  "nosub": "forces all uploads into the top folder of the vfs",
@@ -159,7 +161,7 @@ flagcats = {
159
161
  "lifetime=3600": "uploads are deleted after 1 hour",
160
162
  },
161
163
  "database, general": {
162
- "e2d": "enable database; makes files searchable + enables upload dedup",
164
+ "e2d": "enable database; makes files searchable + enables upload-undo",
163
165
  "e2ds": "scan writable folders for new files on startup; also sets -e2d",
164
166
  "e2dsa": "scans all folders for new files on startup; also sets -e2d",
165
167
  "e2t": "enable multimedia indexing; makes it possible to search for tags",
@@ -177,7 +179,7 @@ flagcats = {
177
179
  "noforget": "don't forget files when deleted from disk",
178
180
  "fat32": "avoid excessive reindexing on android sdcardfs",
179
181
  "dbd=[acid|swal|wal|yolo]": "database speed-durability tradeoff",
180
- "xlink": "cross-volume dupe detection / linking",
182
+ "xlink": "cross-volume dupe detection / linking (dangerous)",
181
183
  "xdev": "do not descend into other filesystems",
182
184
  "xvol": "do not follow symlinks leaving the volume root",
183
185
  "dotsrch": "show dotfiles in search results",
copyparty/svchub.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import print_function, unicode_literals
3
3
 
4
4
  import argparse
5
5
  import base64
6
- import calendar
7
6
  import errno
8
7
  import gzip
9
8
  import logging
@@ -16,7 +15,7 @@ import string
16
15
  import sys
17
16
  import threading
18
17
  import time
19
- from datetime import datetime, timedelta
18
+ from datetime import datetime
20
19
 
21
20
  # from inspect import currentframe
22
21
  # print(currentframe().f_lineno)
@@ -98,6 +97,7 @@ class SvcHub(object):
98
97
  self.argv = argv
99
98
  self.E = args.E
100
99
  self.no_ansi = args.no_ansi
100
+ self.tz = UTC if args.log_utc else None
101
101
  self.logf = None
102
102
  self.logf_base_fn = ""
103
103
  self.is_dut = False # running in unittest; always False
@@ -112,7 +112,8 @@ class SvcHub(object):
112
112
  self.httpsrv_up = 0
113
113
 
114
114
  self.log_mutex = threading.Lock()
115
- self.next_day = 0
115
+ self.cday = 0
116
+ self.cmon = 0
116
117
  self.tstack = 0.0
117
118
 
118
119
  self.iphash = HMaccas(os.path.join(self.E.cfg, "iphash"), 8)
@@ -785,7 +786,7 @@ class SvcHub(object):
785
786
  self.args.nc = min(self.args.nc, soft // 2)
786
787
 
787
788
  def _logname(self) :
788
- dt = datetime.now(UTC)
789
+ dt = datetime.now(self.tz)
789
790
  fn = str(self.args.lo)
790
791
  for fs in "YmdHMS":
791
792
  fs = "%" + fs
@@ -1058,12 +1059,12 @@ class SvcHub(object):
1058
1059
  return
1059
1060
 
1060
1061
  with self.log_mutex:
1061
- zd = datetime.now(UTC)
1062
+ dt = datetime.now(self.tz)
1062
1063
  ts = self.log_dfmt % (
1063
- zd.year,
1064
- zd.month * 100 + zd.day,
1065
- (zd.hour * 100 + zd.minute) * 100 + zd.second,
1066
- zd.microsecond // self.log_div,
1064
+ dt.year,
1065
+ dt.month * 100 + dt.day,
1066
+ (dt.hour * 100 + dt.minute) * 100 + dt.second,
1067
+ dt.microsecond // self.log_div,
1067
1068
  )
1068
1069
 
1069
1070
  if c and not self.args.no_ansi:
@@ -1084,41 +1085,26 @@ class SvcHub(object):
1084
1085
  if not self.args.no_logflush:
1085
1086
  self.logf.flush()
1086
1087
 
1087
- now = time.time()
1088
- if int(now) >= self.next_day:
1089
- self._set_next_day()
1088
+ if dt.day != self.cday or dt.month != self.cmon:
1089
+ self._set_next_day(dt)
1090
1090
 
1091
- def _set_next_day(self) :
1092
- if self.next_day and self.logf and self.logf_base_fn != self._logname():
1091
+ def _set_next_day(self, dt ) :
1092
+ if self.cday and self.logf and self.logf_base_fn != self._logname():
1093
1093
  self.logf.close()
1094
1094
  self._setup_logfile("")
1095
1095
 
1096
- dt = datetime.now(UTC)
1097
-
1098
- # unix timestamp of next 00:00:00 (leap-seconds safe)
1099
- day_now = dt.day
1100
- while dt.day == day_now:
1101
- dt += timedelta(hours=12)
1102
-
1103
- dt = dt.replace(hour=0, minute=0, second=0)
1104
- try:
1105
- tt = dt.utctimetuple()
1106
- except:
1107
- # still makes me hella uncomfortable
1108
- tt = dt.timetuple()
1109
-
1110
- self.next_day = calendar.timegm(tt)
1096
+ self.cday = dt.day
1097
+ self.cmon = dt.month
1111
1098
 
1112
1099
  def _log_enabled(self, src , msg , c = 0) :
1113
1100
  """handles logging from all components"""
1114
1101
  with self.log_mutex:
1115
- now = time.time()
1116
- if int(now) >= self.next_day:
1117
- dt = datetime.fromtimestamp(now, UTC)
1102
+ dt = datetime.now(self.tz)
1103
+ if dt.day != self.cday or dt.month != self.cmon:
1118
1104
  zs = "{}\n" if self.no_ansi else "\033[36m{}\033[0m\n"
1119
1105
  zs = zs.format(dt.strftime("%Y-%m-%d"))
1120
1106
  print(zs, end="")
1121
- self._set_next_day()
1107
+ self._set_next_day(dt)
1122
1108
  if self.logf:
1123
1109
  self.logf.write(zs)
1124
1110
 
@@ -1137,12 +1123,11 @@ class SvcHub(object):
1137
1123
  else:
1138
1124
  msg = "%s%s\033[0m" % (c, msg)
1139
1125
 
1140
- zd = datetime.fromtimestamp(now, UTC)
1141
1126
  ts = self.log_efmt % (
1142
- zd.hour,
1143
- zd.minute,
1144
- zd.second,
1145
- zd.microsecond // self.log_div,
1127
+ dt.hour,
1128
+ dt.minute,
1129
+ dt.second,
1130
+ dt.microsecond // self.log_div,
1146
1131
  )
1147
1132
  msg = fmt % (ts, src, msg)
1148
1133
  try:
copyparty/up2k.py CHANGED
@@ -1459,7 +1459,7 @@ class Up2k(object):
1459
1459
  self.log("file: {}".format(abspath))
1460
1460
 
1461
1461
  try:
1462
- hashes = self._hashlist_from_file(
1462
+ hashes, _ = self._hashlist_from_file(
1463
1463
  abspath, "a{}, ".format(self.pp.n)
1464
1464
  )
1465
1465
  except Exception as ex:
@@ -1653,6 +1653,7 @@ class Up2k(object):
1653
1653
  qex = " where " + qex
1654
1654
 
1655
1655
  rewark = []
1656
+ f404 = []
1656
1657
 
1657
1658
  with self.mutex:
1658
1659
  b_left = 0
@@ -1669,7 +1670,8 @@ class Up2k(object):
1669
1670
  if self.stop:
1670
1671
  return -1
1671
1672
 
1672
- w, drd, dfn = zb[:-1].decode("utf-8").split("\x00")
1673
+ zs = zb[:-1].decode("utf-8").replace("\x00\x02", "\n")
1674
+ w, drd, dfn = zs.split("\x00\x01")
1673
1675
  with self.mutex:
1674
1676
  q = "select mt, sz from up where rd=? and fn=? and +w=?"
1675
1677
  try:
@@ -1695,9 +1697,14 @@ class Up2k(object):
1695
1697
  pf = "v{}, {:.0f}+".format(n_left, b_left / 1024 / 1024)
1696
1698
  self.pp.msg = pf + abspath
1697
1699
 
1698
- # throws on broken symlinks (always did)
1699
- stl = bos.lstat(abspath)
1700
- st = bos.stat(abspath) if stat.S_ISLNK(stl.st_mode) else stl
1700
+ try:
1701
+ stl = bos.lstat(abspath)
1702
+ st = bos.stat(abspath) if stat.S_ISLNK(stl.st_mode) else stl
1703
+ except Exception as ex:
1704
+ self.log("missing file: %s" % (abspath,), 3)
1705
+ f404.append((drd, dfn, w))
1706
+ continue
1707
+
1701
1708
  mt2 = int(stl.st_mtime)
1702
1709
  sz2 = st.st_size
1703
1710
 
@@ -1708,7 +1715,7 @@ class Up2k(object):
1708
1715
  self.log("file: {}".format(abspath))
1709
1716
 
1710
1717
  try:
1711
- hashes = self._hashlist_from_file(abspath, pf)
1718
+ hashes, _ = self._hashlist_from_file(abspath, pf)
1712
1719
  except Exception as ex:
1713
1720
  self.log("hash: {} @ [{}]".format(repr(ex), abspath))
1714
1721
  continue
@@ -1734,12 +1741,15 @@ class Up2k(object):
1734
1741
  t = t.format(abspath, w, sz, mt, w2, sz2, mt2)
1735
1742
  self.log(t, 1)
1736
1743
 
1737
- if e2vp and rewark:
1744
+ if e2vp and (rewark or f404):
1738
1745
  self.hub.retcode = 1
1739
1746
  Daemon(self.hub.sigterm)
1740
- raise Exception("{} files have incorrect hashes".format(len(rewark)))
1747
+ t = "in volume /%s: %s files missing, %s files have incorrect hashes"
1748
+ t = t % (vol.vpath, len(f404), len(rewark))
1749
+ self.log(t, 1)
1750
+ raise Exception(t)
1741
1751
 
1742
- if not e2vu or not rewark:
1752
+ if not e2vu or (not rewark and not f404):
1743
1753
  return 0
1744
1754
 
1745
1755
  with self.mutex:
@@ -1747,9 +1757,13 @@ class Up2k(object):
1747
1757
  q = "update up set w = ?, sz = ?, mt = ? where rd = ? and fn = ? limit 1"
1748
1758
  cur.execute(q, (w, sz, int(mt), rd, fn))
1749
1759
 
1760
+ for _, _, w in f404:
1761
+ q = "delete from up where w = ? limit 1"
1762
+ cur.execute(q, (w,))
1763
+
1750
1764
  cur.connection.commit()
1751
1765
 
1752
- return len(rewark)
1766
+ return len(rewark) + len(f404)
1753
1767
 
1754
1768
  def _build_tags_index(self, vol ) :
1755
1769
  ptop = vol.realpath
@@ -1964,7 +1978,8 @@ class Up2k(object):
1964
1978
  if c2.execute(q, (row[0][:16],)).fetchone():
1965
1979
  continue
1966
1980
 
1967
- gf.write(("%s\n" % ("\x00".join(row),)).encode("utf-8"))
1981
+ zs = "\x00\x01".join(row).replace("\n", "\x00\x02")
1982
+ gf.write((zs + "\n").encode("utf-8"))
1968
1983
  n += 1
1969
1984
 
1970
1985
  c2.close()
@@ -2663,10 +2678,13 @@ class Up2k(object):
2663
2678
  jcur = self.cur.get(ptop)
2664
2679
  reg = self.registry[ptop]
2665
2680
  vfs = self.asrv.vfs.all_vols[cj["vtop"]]
2666
- n4g = vfs.flags.get("noforget")
2681
+ n4g = bool(vfs.flags.get("noforget"))
2667
2682
  rand = vfs.flags.get("rand") or cj.get("rand")
2668
2683
  lost = []
2669
2684
 
2685
+ safe_dedup = vfs.flags.get("safededup") or 50
2686
+ data_ok = safe_dedup < 10 or n4g
2687
+
2670
2688
  vols = [(ptop, jcur)] if jcur else []
2671
2689
  if vfs.flags.get("xlink"):
2672
2690
  vols += [(k, v) for k, v in self.cur.items() if k != ptop]
@@ -2674,7 +2692,7 @@ class Up2k(object):
2674
2692
  # force upload time rather than last-modified
2675
2693
  cj["lmod"] = int(time.time())
2676
2694
 
2677
- alts = []
2695
+ alts = []
2678
2696
  for ptop, cur in vols:
2679
2697
  allv = self.asrv.vfs.all_vols
2680
2698
  cvfs = next((v for v in allv.values() if v.realpath == ptop), vfs)
@@ -2704,13 +2722,12 @@ class Up2k(object):
2704
2722
  wark, st.st_size, dsize, st.st_mtime, dtime, dp_abs
2705
2723
  )
2706
2724
  self.log(t)
2707
- raise Exception("desync")
2725
+ raise Exception()
2708
2726
  except Exception as ex:
2709
2727
  if n4g:
2710
2728
  st = os.stat_result((0, -1, -1, 0, 0, 0, 0, 0, 0, 0))
2711
2729
  else:
2712
- if str(ex) != "desync":
2713
- lost.append((cur, dp_dir, dp_fn))
2730
+ lost.append((cur, dp_dir, dp_fn))
2714
2731
  continue
2715
2732
 
2716
2733
  j = {
@@ -2733,18 +2750,42 @@ class Up2k(object):
2733
2750
  if k in cj:
2734
2751
  j[k] = cj[k]
2735
2752
 
2753
+ # offset of 1st diff in vpaths
2754
+ zig = (
2755
+ n + 1
2756
+ for n, (c1, c2) in enumerate(
2757
+ zip(dp_dir + "\r", cj["prel"] + "\n")
2758
+ )
2759
+ if c1 != c2
2760
+ )
2736
2761
  score = (
2737
- (3 if st.st_dev == dev else 0)
2738
- + (2 if dp_dir == cj["prel"] else 0)
2762
+ (6969 if st.st_dev == dev else 0)
2763
+ + (3210 if dp_dir == cj["prel"] else next(zig))
2739
2764
  + (1 if dp_fn == cj["name"] else 0)
2740
2765
  )
2741
- alts.append((score, -len(alts), j))
2742
-
2743
- if alts:
2744
- best = sorted(alts, reverse=True)[0]
2745
- job = best[2]
2746
- else:
2747
- job = None
2766
+ alts.append((score, -len(alts), j, cur, dp_dir, dp_fn))
2767
+
2768
+ job = None
2769
+ inc_ap = djoin(cj["ptop"], cj["prel"], cj["name"])
2770
+ for dupe in sorted(alts, reverse=True):
2771
+ rj = dupe[2]
2772
+ orig_ap = djoin(rj["ptop"], rj["prel"], rj["name"])
2773
+ if data_ok or inc_ap == orig_ap:
2774
+ data_ok = True
2775
+ job = rj
2776
+ break
2777
+ else:
2778
+ self.log("asserting contents of %s" % (orig_ap,))
2779
+ dhashes, st = self._hashlist_from_file(orig_ap)
2780
+ dwark = up2k_wark_from_hashlist(self.salt, st.st_size, dhashes)
2781
+ if wark != dwark:
2782
+ t = "will not dedup (fs index desync): fs=%s, db=%s, file: %s"
2783
+ self.log(t % (dwark, wark, orig_ap))
2784
+ lost.append(dupe[3:])
2785
+ continue
2786
+ data_ok = True
2787
+ job = rj
2788
+ break
2748
2789
 
2749
2790
  if job and wark in reg:
2750
2791
  # self.log("pop " + wark + " " + job["name"] + " handle_json db", 4)
@@ -2753,7 +2794,7 @@ class Up2k(object):
2753
2794
  if lost:
2754
2795
  c2 = None
2755
2796
  for cur, dp_dir, dp_fn in lost:
2756
- t = "forgetting deleted file: /{}"
2797
+ t = "forgetting desynced db entry: /{}"
2757
2798
  self.log(t.format(vjoin(vjoin(vfs.vpath, dp_dir), dp_fn)))
2758
2799
  self.db_rm(cur, dp_dir, dp_fn, cj["size"])
2759
2800
  if c2 and c2 != cur:
@@ -2788,7 +2829,13 @@ class Up2k(object):
2788
2829
  del reg[wark]
2789
2830
  break
2790
2831
 
2791
- if st and not self.args.nw and not n4g and st.st_size != rj["size"]:
2832
+ inc_ap = djoin(cj["ptop"], cj["prel"], cj["name"])
2833
+ orig_ap = djoin(rj["ptop"], rj["prel"], rj["name"])
2834
+
2835
+ if self.args.nw or n4g or not st:
2836
+ pass
2837
+
2838
+ elif st.st_size != rj["size"]:
2792
2839
  t = "will not dedup (fs index desync): {}, size fs={} db={}, mtime fs={} db={}, file: {}"
2793
2840
  t = t.format(
2794
2841
  wark, st.st_size, rj["size"], st.st_mtime, rj["lmod"], path
@@ -2796,6 +2843,15 @@ class Up2k(object):
2796
2843
  self.log(t)
2797
2844
  del reg[wark]
2798
2845
 
2846
+ elif inc_ap != orig_ap and not data_ok:
2847
+ self.log("asserting contents of %s" % (orig_ap,))
2848
+ dhashes, _ = self._hashlist_from_file(orig_ap)
2849
+ dwark = up2k_wark_from_hashlist(self.salt, st.st_size, dhashes)
2850
+ if wark != dwark:
2851
+ t = "will not dedup (fs index desync): fs=%s, idx=%s, file: %s"
2852
+ self.log(t % (dwark, wark, orig_ap))
2853
+ del reg[wark]
2854
+
2799
2855
  if job or wark in reg:
2800
2856
  job = job or reg[wark]
2801
2857
  if (
@@ -3067,17 +3123,25 @@ class Up2k(object):
3067
3123
  verbose = True,
3068
3124
  rm = False,
3069
3125
  lmod = 0,
3126
+ fsrc = None,
3070
3127
  ) :
3128
+ if src == dst or (fsrc and fsrc == dst):
3129
+ t = "symlinking a file to itself?? orig(%s) fsrc(%s) link(%s)"
3130
+ raise Exception(t % (src, fsrc, dst))
3131
+
3071
3132
  if verbose:
3072
- self.log("linking dupe:\n {0}\n {1}".format(src, dst))
3133
+ t = "linking dupe:\n point-to: {0}\n link-loc: {1}"
3134
+ if fsrc:
3135
+ t += "\n data-src: {2}"
3136
+ self.log(t.format(src, dst, fsrc))
3073
3137
 
3074
3138
  if self.args.nw:
3075
3139
  return
3076
3140
 
3077
3141
  linked = False
3078
3142
  try:
3079
- if "copydupes" in flags:
3080
- raise Exception("disabled in config")
3143
+ if not flags.get("dedup"):
3144
+ raise Exception("dedup is disabled in config")
3081
3145
 
3082
3146
  lsrc = src
3083
3147
  ldst = dst
@@ -3114,7 +3178,7 @@ class Up2k(object):
3114
3178
  linked = True
3115
3179
  except Exception as ex:
3116
3180
  self.log("cannot hardlink: " + repr(ex))
3117
- if "neversymlink" in flags:
3181
+ if "hardlinkonly" in flags:
3118
3182
  raise Exception("symlink-fallback disabled in cfg")
3119
3183
 
3120
3184
  if not linked:
@@ -3133,7 +3197,15 @@ class Up2k(object):
3133
3197
  linked = True
3134
3198
  except Exception as ex:
3135
3199
  self.log("cannot link; creating copy: " + repr(ex))
3136
- shutil.copy2(fsenc(src), fsenc(dst))
3200
+ if bos.path.isfile(src):
3201
+ csrc = src
3202
+ elif fsrc and bos.path.isfile(fsrc):
3203
+ csrc = fsrc
3204
+ else:
3205
+ t = "BUG: no valid sources to link from! orig(%s) fsrc(%s) link(%s)"
3206
+ self.log(t, 1)
3207
+ raise Exception(t % (src, fsrc, dst))
3208
+ shutil.copy2(fsenc(csrc), fsenc(dst))
3137
3209
 
3138
3210
  if lmod and (not linked or SYMTIME):
3139
3211
  times = (int(time.time()), int(lmod))
@@ -3695,8 +3767,11 @@ class Up2k(object):
3695
3767
  cur = None
3696
3768
  try:
3697
3769
  ptop = dbv.realpath
3770
+ xlink = bool(dbv.flags.get("xlink"))
3698
3771
  cur, wark, _, _, _, _ = self._find_from_vpath(ptop, volpath)
3699
- self._forget_file(ptop, volpath, cur, wark, True, st.st_size)
3772
+ self._forget_file(
3773
+ ptop, volpath, cur, wark, True, st.st_size, xlink
3774
+ )
3700
3775
  finally:
3701
3776
  if cur:
3702
3777
  cur.connection.commit()
@@ -3920,13 +3995,15 @@ class Up2k(object):
3920
3995
  if c2 and c2 != c1:
3921
3996
  self._copy_tags(c1, c2, w)
3922
3997
 
3998
+ xlink = bool(svn.flags.get("xlink"))
3999
+
3923
4000
  with self.reg_mutex:
3924
4001
  has_dupes = self._forget_file(
3925
- svn.realpath, srem, c1, w, is_xvol, fsize_ or fsize
4002
+ svn.realpath, srem, c1, w, is_xvol, fsize_ or fsize, xlink
3926
4003
  )
3927
4004
 
3928
4005
  if not is_xvol:
3929
- has_dupes = self._relink(w, svn.realpath, srem, dabs)
4006
+ has_dupes = self._relink(w, svn.realpath, srem, dabs, c1, xlink)
3930
4007
 
3931
4008
  curs.add(c1)
3932
4009
 
@@ -4069,6 +4146,7 @@ class Up2k(object):
4069
4146
  wark ,
4070
4147
  drop_tags ,
4071
4148
  sz ,
4149
+ xlink ,
4072
4150
  ) :
4073
4151
  """
4074
4152
  mutex(main,reg) me
@@ -4080,7 +4158,7 @@ class Up2k(object):
4080
4158
  if wark and cur:
4081
4159
  self.log("found {} in db".format(wark))
4082
4160
  if drop_tags:
4083
- if self._relink(wark, ptop, vrem, ""):
4161
+ if self._relink(wark, ptop, vrem, "", cur, xlink):
4084
4162
  has_dupes = True
4085
4163
  drop_tags = False
4086
4164
 
@@ -4112,7 +4190,15 @@ class Up2k(object):
4112
4190
 
4113
4191
  return has_dupes
4114
4192
 
4115
- def _relink(self, wark , sptop , srem , dabs ) :
4193
+ def _relink(
4194
+ self,
4195
+ wark ,
4196
+ sptop ,
4197
+ srem ,
4198
+ dabs ,
4199
+ vcur ,
4200
+ xlink ,
4201
+ ) :
4116
4202
  """
4117
4203
  update symlinks from file at svn/srem to dabs (rename),
4118
4204
  or to first remaining full if no dabs (delete)
@@ -4128,6 +4214,8 @@ class Up2k(object):
4128
4214
  argv = (wark[:16], wark)
4129
4215
 
4130
4216
  for ptop, cur in self.cur.items():
4217
+ if not xlink and cur and cur != vcur:
4218
+ continue
4131
4219
  for rd, fn in cur.execute(q, argv):
4132
4220
  if rd.startswith("//") or fn.startswith("//"):
4133
4221
  rd, fn = s3dec(rd, fn)
@@ -4214,7 +4302,13 @@ class Up2k(object):
4214
4302
  except:
4215
4303
  pass
4216
4304
 
4217
- self._symlink(dabs, alink, flags, False, lmod=lmod or 0)
4305
+ # this creates a link pointing from dabs to alink; alink may
4306
+ # not exist yet, which becomes problematic if the symlinking
4307
+ # fails and it has to fall back on hardlinking/copying files
4308
+ # (for example a volume with symlinked dupes but no --dedup);
4309
+ # fsrc=sabs is then a source that currently resolves to copy
4310
+
4311
+ self._symlink(dabs, alink, flags, False, lmod=lmod or 0, fsrc=sabs)
4218
4312
 
4219
4313
  return len(full) + len(links)
4220
4314
 
@@ -4243,8 +4337,11 @@ class Up2k(object):
4243
4337
 
4244
4338
  return wark
4245
4339
 
4246
- def _hashlist_from_file(self, path , prefix = "") :
4247
- fsz = bos.path.getsize(path)
4340
+ def _hashlist_from_file(
4341
+ self, path , prefix = ""
4342
+ ) :
4343
+ st = bos.stat(path)
4344
+ fsz = st.st_size
4248
4345
  csz = up2k_chunksize(fsz)
4249
4346
  ret = []
4250
4347
  suffix = " MB, {}".format(path)
@@ -4257,7 +4354,7 @@ class Up2k(object):
4257
4354
  while fsz > 0:
4258
4355
  # same as `hash_at` except for `imutex` / bufsz
4259
4356
  if self.stop:
4260
- return []
4357
+ return [], st
4261
4358
 
4262
4359
  if self.pp:
4263
4360
  mb = fsz // (1024 * 1024)
@@ -4278,7 +4375,7 @@ class Up2k(object):
4278
4375
  digest = base64.urlsafe_b64encode(digest)
4279
4376
  ret.append(digest.decode("utf-8"))
4280
4377
 
4281
- return ret
4378
+ return ret, st
4282
4379
 
4283
4380
  def _new_upload(self, job , vfs , depth ) :
4284
4381
  pdir = djoin(job["ptop"], job["prel"])
@@ -4579,7 +4676,7 @@ class Up2k(object):
4579
4676
  self.salt, inf.st_size, int(inf.st_mtime), rd, fn
4580
4677
  )
4581
4678
  else:
4582
- hashes = self._hashlist_from_file(abspath)
4679
+ hashes, _ = self._hashlist_from_file(abspath)
4583
4680
  if not hashes:
4584
4681
  return False
4585
4682
 
copyparty/util.py CHANGED
@@ -239,6 +239,8 @@ IMPLICATIONS = [
239
239
  ["e2vu", "e2v"],
240
240
  ["e2vp", "e2v"],
241
241
  ["e2v", "e2d"],
242
+ ["hardlink_only", "hardlink"],
243
+ ["hardlink", "dedup"],
242
244
  ["tftpvv", "tftpv"],
243
245
  ["smbw", "smb"],
244
246
  ["smb1", "smb"],
copyparty/web/a/u2c.py CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env python3
2
2
  from __future__ import print_function, unicode_literals
3
3
 
4
- S_VERSION = "1.23"
5
- S_BUILD_DT = "2024-08-22"
4
+ S_VERSION = "1.24"
5
+ S_BUILD_DT = "2024-09-05"
6
6
 
7
7
  """
8
8
  u2c.py: upload to copyparty
@@ -41,19 +41,25 @@ except:
41
41
 
42
42
  try:
43
43
  import requests
44
+
45
+ req_ses = requests.Session()
44
46
  except ImportError as ex:
45
- if EXE:
47
+ if "-" in sys.argv or "-h" in sys.argv:
48
+ m = ""
49
+ elif EXE:
46
50
  raise
47
51
  elif sys.version_info > (2, 7):
48
- m = "\nERROR: need 'requests'; please run this command:\n {0} -m pip install --user requests\n"
52
+ m = "\nERROR: need 'requests'{0}; please run this command:\n {1} -m pip install --user requests\n"
49
53
  else:
50
54
  m = "requests/2.18.4 urllib3/1.23 chardet/3.0.4 certifi/2020.4.5.1 idna/2.7"
51
55
  m = [" https://pypi.org/project/" + x + "/#files" for x in m.split()]
52
- m = "\n ERROR: need these:\n" + "\n".join(m) + "\n"
56
+ m = "\n ERROR: need these{0}:\n" + "\n".join(m) + "\n"
53
57
  m += "\n for f in *.whl; do unzip $f; done; rm -r *.dist-info\n"
54
58
 
55
- print(m.format(sys.executable), "\nspecifically,", ex)
56
- sys.exit(1)
59
+ if m:
60
+ t = " when not running with '-h' or url '-'"
61
+ print(m.format(t, sys.executable), "\nspecifically,", ex)
62
+ sys.exit(1)
57
63
 
58
64
 
59
65
  # from copyparty/__init__.py
@@ -76,7 +82,22 @@ else:
76
82
  VT100 = platform.system() != "Windows"
77
83
 
78
84
 
79
- req_ses = requests.Session()
85
+ try:
86
+ UTC = datetime.timezone.utc
87
+ except:
88
+ TD_ZERO = datetime.timedelta(0)
89
+
90
+ class _UTC(datetime.tzinfo):
91
+ def utcoffset(self, dt):
92
+ return TD_ZERO
93
+
94
+ def tzname(self, dt):
95
+ return "UTC"
96
+
97
+ def dst(self, dt):
98
+ return TD_ZERO
99
+
100
+ UTC = _UTC()
80
101
 
81
102
 
82
103
  class Daemon(threading.Thread):
@@ -271,6 +292,12 @@ class MTHash(object):
271
292
  _print = print
272
293
 
273
294
 
295
+ def safe_print(*a, **ka):
296
+ ka["end"] = ""
297
+ zs = " ".join([unicode(x) for x in a])
298
+ _print(zs + "\n", **ka)
299
+
300
+
274
301
  def eprint(*a, **ka):
275
302
  ka["file"] = sys.stderr
276
303
  ka["end"] = ""
@@ -284,18 +311,17 @@ def eprint(*a, **ka):
284
311
 
285
312
  def flushing_print(*a, **ka):
286
313
  try:
287
- _print(*a, **ka)
314
+ safe_print(*a, **ka)
288
315
  except:
289
316
  v = " ".join(str(x) for x in a)
290
317
  v = v.encode("ascii", "replace").decode("ascii")
291
- _print(v, **ka)
318
+ safe_print(v, **ka)
292
319
 
293
320
  if "flush" not in ka:
294
321
  sys.stdout.flush()
295
322
 
296
323
 
297
- if not VT100:
298
- print = flushing_print
324
+ print = safe_print if VT100 else flushing_print
299
325
 
300
326
 
301
327
  def termsize():
@@ -770,8 +796,6 @@ class Ctl(object):
770
796
  self.up_c = 0
771
797
  self.up_b = 0
772
798
  self.up_br = 0
773
- self.hasher_busy = 1
774
- self.handshaker_busy = 0
775
799
  self.uploader_busy = 0
776
800
  self.serialized = False
777
801
 
@@ -781,6 +805,9 @@ class Ctl(object):
781
805
  self.eta = "99:99:99"
782
806
 
783
807
  self.mutex = threading.Lock()
808
+ self.exit_cond = threading.Condition()
809
+ self.uploader_alive = ar.j
810
+ self.handshaker_alive = ar.j
784
811
  self.q_handshake = Queue() # type: Queue[File]
785
812
  self.q_upload = Queue() # type: Queue[FileSlice]
786
813
 
@@ -851,27 +878,21 @@ class Ctl(object):
851
878
  Daemon(self.handshaker)
852
879
  Daemon(self.uploader)
853
880
 
854
- idles = 0
855
- while idles < 3:
856
- time.sleep(0.07)
881
+ while True:
882
+ with self.exit_cond:
883
+ self.exit_cond.wait(0.07)
857
884
  with self.mutex:
858
- if (
859
- self.q_handshake.empty()
860
- and self.q_upload.empty()
861
- and not self.hasher_busy
862
- and not self.handshaker_busy
863
- and not self.uploader_busy
864
- ):
865
- idles += 1
866
- else:
867
- idles = 0
885
+ if not self.handshaker_alive and not self.uploader_alive:
886
+ break
887
+ st_hash = self.st_hash[:]
888
+ st_up = self.st_up[:]
868
889
 
869
890
  if VT100 and not self.ar.ns:
870
891
  maxlen = ss.w - len(str(self.nfiles)) - 14
871
892
  txt = "\033[s\033[{0}H".format(ss.g)
872
893
  for y, k, st, f in [
873
- [0, "hash", self.st_hash, self.hash_f],
874
- [1, "send", self.st_up, self.up_f],
894
+ [0, "hash", st_hash, self.hash_f],
895
+ [1, "send", st_up, self.up_f],
875
896
  ]:
876
897
  txt += "\033[{0}H{1}:".format(ss.g + y, k)
877
898
  file, arg = st
@@ -1027,11 +1048,42 @@ class Ctl(object):
1027
1048
  self.hash_f += 1
1028
1049
  self.hash_c += len(file.cids)
1029
1050
  self.hash_b += file.size
1051
+ if self.ar.wlist:
1052
+ self.up_f = self.hash_f
1053
+ self.up_c = self.hash_c
1054
+ self.up_b = self.hash_b
1055
+
1056
+ if self.ar.wlist:
1057
+ zsl = [self.ar.wsalt, str(file.size)] + [x[0] for x in file.kchunks]
1058
+ zb = hashlib.sha512("\n".join(zsl).encode("utf-8")).digest()[:33]
1059
+ wark = base64.urlsafe_b64encode(zb).decode("utf-8")
1060
+ vp = file.rel.decode("utf-8")
1061
+ if self.ar.jw:
1062
+ print("%s %s" % (wark, vp))
1063
+ else:
1064
+ zd = datetime.datetime.fromtimestamp(file.lmod, UTC)
1065
+ dt = "%04d-%02d-%02d %02d:%02d:%02d" % (
1066
+ zd.year,
1067
+ zd.month,
1068
+ zd.day,
1069
+ zd.hour,
1070
+ zd.minute,
1071
+ zd.second,
1072
+ )
1073
+ print("%s %12d %s %s" % (dt, file.size, wark, vp))
1074
+ continue
1030
1075
 
1031
1076
  self.q_handshake.put(file)
1032
1077
 
1033
- self.hasher_busy = 0
1034
1078
  self.st_hash = [None, "(finished)"]
1079
+ self._check_if_done()
1080
+
1081
+ def _check_if_done(self):
1082
+ with self.mutex:
1083
+ if self.nfiles - self.up_f:
1084
+ return
1085
+ for _ in range(self.ar.j):
1086
+ self.q_handshake.put(None)
1035
1087
 
1036
1088
  def handshaker(self):
1037
1089
  search = self.ar.s
@@ -1039,8 +1091,10 @@ class Ctl(object):
1039
1091
  while True:
1040
1092
  file = self.q_handshake.get()
1041
1093
  if not file:
1094
+ with self.mutex:
1095
+ self.handshaker_alive -= 1
1042
1096
  self.q_upload.put(None)
1043
- break
1097
+ return
1044
1098
 
1045
1099
  upath = file.abs.decode("utf-8", "replace")
1046
1100
  if not VT100:
@@ -1052,9 +1106,6 @@ class Ctl(object):
1052
1106
  self.errs += 1
1053
1107
  continue
1054
1108
 
1055
- with self.mutex:
1056
- self.handshaker_busy += 1
1057
-
1058
1109
  while time.time() < file.cd:
1059
1110
  time.sleep(0.1)
1060
1111
 
@@ -1062,17 +1113,17 @@ class Ctl(object):
1062
1113
  if search:
1063
1114
  if hs:
1064
1115
  for hit in hs:
1065
- t = "found: {0}\n {1}{2}\n"
1066
- print(t.format(upath, burl, hit["rp"]), end="")
1116
+ t = "found: {0}\n {1}{2}"
1117
+ print(t.format(upath, burl, hit["rp"]))
1067
1118
  else:
1068
- print("NOT found: {0}\n".format(upath), end="")
1119
+ print("NOT found: {0}".format(upath))
1069
1120
 
1070
1121
  with self.mutex:
1071
1122
  self.up_f += 1
1072
1123
  self.up_c += len(file.cids)
1073
1124
  self.up_b += file.size
1074
- self.handshaker_busy -= 1
1075
1125
 
1126
+ self._check_if_done()
1076
1127
  continue
1077
1128
 
1078
1129
  if file.recheck:
@@ -1104,7 +1155,6 @@ class Ctl(object):
1104
1155
  file.up_b -= sz
1105
1156
 
1106
1157
  file.ucids = hs
1107
- self.handshaker_busy -= 1
1108
1158
 
1109
1159
  if not hs:
1110
1160
  self.at_hash += file.t_hash
@@ -1130,6 +1180,9 @@ class Ctl(object):
1130
1180
  kw = "uploaded" if file.up_b else " found"
1131
1181
  print("{0} {1}".format(kw, upath))
1132
1182
 
1183
+ self._check_if_done()
1184
+ continue
1185
+
1133
1186
  chunksz = up2k_chunksize(file.size)
1134
1187
  njoin = (self.ar.sz * 1024 * 1024) // chunksz
1135
1188
  cs = hs[:]
@@ -1149,8 +1202,16 @@ class Ctl(object):
1149
1202
  while True:
1150
1203
  fsl = self.q_upload.get()
1151
1204
  if not fsl:
1152
- self.st_up = [None, "(finished)"]
1153
- break
1205
+ done = False
1206
+ with self.mutex:
1207
+ self.uploader_alive -= 1
1208
+ if not self.uploader_alive:
1209
+ done = not self.handshaker_alive
1210
+ self.st_up = [None, "(finished)"]
1211
+ if done:
1212
+ with self.exit_cond:
1213
+ self.exit_cond.notify_all()
1214
+ return
1154
1215
 
1155
1216
  file = fsl.file
1156
1217
  cids = fsl.cids
@@ -1252,6 +1313,10 @@ source file/folder selection uses rsync syntax, meaning that:
1252
1313
  ap.add_argument("--dr", action="store_true", help="delete remote files which don't exist locally (implies --ow)")
1253
1314
  ap.add_argument("--drd", action="store_true", help="delete remote files during upload instead of afterwards; reduces peak disk space usage, but will reupload instead of detecting renames")
1254
1315
 
1316
+ ap = app.add_argument_group("file-ID calculator; enable with url '-' to list warks (file identifiers) instead of upload/search")
1317
+ ap.add_argument("--wsalt", type=unicode, metavar="S", default="hunter2", help="salt to use when creating warks; must match server config")
1318
+ ap.add_argument("--jw", action="store_true", help="just identifier+filepath, not mtime/size too")
1319
+
1255
1320
  ap = app.add_argument_group("performance tweaks")
1256
1321
  ap.add_argument("-j", type=int, metavar="CONNS", default=2, help="parallel connections")
1257
1322
  ap.add_argument("-J", type=int, metavar="CORES", default=hcores, help="num cpu-cores to use for hashing; set 0 or 1 for single-core hashing")
@@ -1285,7 +1350,9 @@ source file/folder selection uses rsync syntax, meaning that:
1285
1350
 
1286
1351
  ar.x = "|".join(ar.x or [])
1287
1352
 
1288
- for k in "dl dr drd".split():
1353
+ setattr(ar, "wlist", ar.url == "-")
1354
+
1355
+ for k in "dl dr drd wlist".split():
1289
1356
  errs = []
1290
1357
  if ar.safe and getattr(ar, k):
1291
1358
  errs.append(k)
Binary file
Binary file
copyparty/web/up2k.js.gz CHANGED
Binary file
copyparty/web/util.js.gz CHANGED
Binary file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: copyparty
3
- Version: 1.14.4
3
+ Version: 1.15.0
4
4
  Summary: Portable file server with accelerated resumable uploads, deduplication, WebDAV, FTP, zeroconf, media indexer, video thumbnails, audio transcoding, and write-only folders
5
5
  Author-email: ed <copyparty@ocv.me>
6
6
  License: MIT
@@ -119,7 +119,8 @@ turn almost any device into a file server with resumable uploads/downloads using
119
119
  * [smb server](#smb-server) - unsafe, slow, not recommended for wan
120
120
  * [browser ux](#browser-ux) - tweaking the ui
121
121
  * [opengraph](#opengraph) - discord and social-media embeds
122
- * [file indexing](#file-indexing) - enables dedup and music search ++
122
+ * [file deduplication](#file-deduplication) - enable symlink-based upload deduplication
123
+ * [file indexing](#file-indexing) - enable music search, upload-undo, and better dedup
123
124
  * [exclude-patterns](#exclude-patterns) - to save some time
124
125
  * [filesystem guards](#filesystem-guards) - avoid traversing into other filesystems
125
126
  * [periodic rescan](#periodic-rescan) - filesystem monitoring
@@ -1209,9 +1210,41 @@ NOTE: because discord (and maybe others) strip query args such as `?raw` in open
1209
1210
  if you want to entirely replace the copyparty response with your own jinja2 template, give the template filepath to `--og-tpl` or volflag `og_tpl` (all members of `HttpCli` are available through the `this` object)
1210
1211
 
1211
1212
 
1213
+ ## file deduplication
1214
+
1215
+ enable symlink-based upload deduplication globally with `--dedup` or per-volume with volflag `dedup`
1216
+
1217
+ when someone tries to upload a file that already exists on the server, the upload will be politely declined and a symlink is created instead, pointing to the nearest copy on disk, thus reducinc disk space usage
1218
+
1219
+ **warning:** when enabling dedup, you should also:
1220
+ * enable indexing with `-e2dsa` or volflag `e2dsa` (see [file indexing](#file-indexing) section below); strongly recommended
1221
+ * ...and/or `--hardlink-only` to use hardlink-based deduplication instead of symlinks; see explanation below
1222
+
1223
+ it will not be safe to rename/delete files if you only enable dedup and none of the above; if you enable indexing then it is not *necessary* to also do hardlinks (but you may still want to)
1224
+
1225
+ by default, deduplication is done based on symlinks (symbolic links); these are tiny files which are pointers to the nearest full copy of the file
1226
+
1227
+ you can choose to use hardlinks instead of softlinks, globally with `--hardlink-only` or volflag `hardlinkonly`;
1228
+
1229
+ advantages of using hardlinks:
1230
+ * hardlinks are more compatible with other software; they behave entirely like regular files
1231
+ * you can safely move and rename files using other file managers
1232
+ * symlinks need to be managed by copyparty to ensure the destinations remain correct
1233
+
1234
+ advantages of using symlinks (default):
1235
+ * each symlink can have its own last-modified timestamp, but a single timestamp is shared by all hardlinks
1236
+ * symlinks make it more obvious to other software that the file is not a regular file, so this can be less dangerous
1237
+ * hardlinks look like regular files, so other software may assume they are safe to edit without affecting the other copies
1238
+
1239
+ **warning:** if you edit the contents of a deduplicated file, then you will also edit all other copies of that file! This is especially surprising with hardlinks, because they look like regular files, but that same file exists in multiple locations
1240
+
1241
+ global-option `--xlink` / volflag `xlink` additionally enables deduplication across volumes, but this is probably buggy and not recommended
1242
+
1243
+
1244
+
1212
1245
  ## file indexing
1213
1246
 
1214
- enables dedup and music search ++
1247
+ enable music search, upload-undo, and better dedup
1215
1248
 
1216
1249
  file indexing relies on two database tables, the up2k filetree (`-e2d`) and the metadata tags (`-e2t`), stored in `.hist/up2k.db`. Configuration can be done through arguments, volflags, or a mix of both.
1217
1250
 
@@ -1225,7 +1258,6 @@ through arguments:
1225
1258
  * `-e2v` verfies file integrity at startup, comparing hashes from the db
1226
1259
  * `-e2vu` patches the database with the new hashes from the filesystem
1227
1260
  * `-e2vp` panics and kills copyparty instead
1228
- * `--xlink` enables deduplication across volumes
1229
1261
 
1230
1262
  the same arguments can be set as volflags, in addition to `d2d`, `d2ds`, `d2t`, `d2ts`, `d2v` for disabling:
1231
1263
  * `-v ~/music::r:c,e2ds,e2tsr` does a full reindex of everything on startup
@@ -1238,7 +1270,6 @@ note:
1238
1270
  * upload-times can be displayed in the file listing by enabling the `.up_at` metadata key, either globally with `-e2d -mte +.up_at` or per-volume with volflags `e2d,mte=+.up_at` (will have a ~17% performance impact on directory listings)
1239
1271
  * `e2tsr` is probably always overkill, since `e2ds`/`e2dsa` would pick up any file modifications and `e2ts` would then reindex those, unless there is a new copyparty version with new parsers and the release note says otherwise
1240
1272
  * the rescan button in the admin panel has no effect unless the volume has `-e2ds` or higher
1241
- * deduplication is possible on windows if you run copyparty as administrator (not saying you should!)
1242
1273
 
1243
1274
  ### exclude-patterns
1244
1275
 
@@ -1978,6 +2009,8 @@ below are some tweaks roughly ordered by usefulness:
1978
2009
  * `-q` disables logging and can help a bunch, even when combined with `-lo` to redirect logs to file
1979
2010
  * `--hist` pointing to a fast location (ssd) will make directory listings and searches faster when `-e2d` or `-e2t` is set
1980
2011
  * and also makes thumbnails load faster, regardless of e2d/e2t
2012
+ * `--dedup` enables deduplication and thus avoids writing to the HDD if someone uploads a dupe
2013
+ * `--safe-dedup 1` makes deduplication much faster during upload by skipping verification of file contents; safe if there is no other software editing/moving the files in the volumes
1981
2014
  * `--no-hash .` when indexing a network-disk if you don't care about the actual filehashes and only want the names/tags searchable
1982
2015
  * if your volumes are on a network-disk such as NFS / SMB / s3, specifying larger values for `--iobuf` and/or `--s-rd-sz` and/or `--s-wr-sz` may help; try setting all of them to `524288` or `1048576` or `4194304`
1983
2016
  * `--no-htp --hash-mt=0 --mtag-mt=1 --th-mt=1` minimizes the number of threads; can help in some eccentric environments (like the vscode debugger)
@@ -2032,7 +2065,7 @@ safety profiles:
2032
2065
  * `--hardlink` creates hardlinks instead of symlinks when deduplicating uploads, which is less maintenance
2033
2066
  * however note if you edit one file it will also affect the other copies
2034
2067
  * `--vague-403` returns a "404 not found" instead of "401 unauthorized" which is a common enterprise meme
2035
- * `--nih` removes the server hostname from directory listings
2068
+ * `-nih` removes the server hostname from directory listings
2036
2069
 
2037
2070
  * option `-sss` is a shortcut for the above plus:
2038
2071
  * `--no-dav` disables webdav support
@@ -1,13 +1,13 @@
1
1
  copyparty/__init__.py,sha256=fUINM1abqDGzCCH_JcXdOnLdKOV-SrTI2Xo2QgQW2P4,1703
2
- copyparty/__main__.py,sha256=0fQcGqHkcYXRqtkh9as-xyBfV_5R6z2fQf41WFtjRe8,108093
3
- copyparty/__version__.py,sha256=nvd8B5TJuiwdP2pBNNqiQlQQi41vsmExvBjCabqN22Q,257
4
- copyparty/authsrv.py,sha256=jWXTjZLT8cGymfa9wBwGJqyBIi80aXcvzAMgI74G8iA,95750
2
+ copyparty/__main__.py,sha256=snMG7GjXZ54Zssycrb5jiZSiyo04-Q84XtLHILOnmVw,108558
3
+ copyparty/__version__.py,sha256=P2ttoiozX2VCs4PraPoVx64v13JfCOKOL5vl61l3aTE,256
4
+ copyparty/authsrv.py,sha256=7_gMjcUSm7igR4NjZv34TegBKizmBvWjeRChT5MfF_s,96535
5
5
  copyparty/broker_mp.py,sha256=YFe1S6Zziht8Qc__dCLj_ff8z0DDny9lqk_Mi5ajsJk,3868
6
6
  copyparty/broker_mpw.py,sha256=4ZI7bJYOwUibeAJVv9_FPGNmHrr9eOtkj_Kz0JEppTU,3197
7
7
  copyparty/broker_thr.py,sha256=eKr--HJGig5zqvNGwH9UoBG9Nvi9mT2axrRmJwknd0s,1759
8
8
  copyparty/broker_util.py,sha256=w0E-GhoOgq8ow7mEWi3GOyqraux6VG9yk1tif1yo0jc,1474
9
9
  copyparty/cert.py,sha256=kRFkMwBUCV_Vo7BYweD-yJ7Hpp5BCpaXneyBWxlu1PM,7759
10
- copyparty/cfg.py,sha256=i8-bjWgbguQooxiA172RcptqR_SEOwDHJ4cqldrZ8oQ,9792
10
+ copyparty/cfg.py,sha256=6cj2xJnBa9vRubM5U_mkA87zG2Ug11vnyk2hYz0XfxI,9965
11
11
  copyparty/dxml.py,sha256=lZpg-kn-kQsXRtNY1n6fRaS-b7uXzMCyv8ovKnhZcZc,1548
12
12
  copyparty/fsutil.py,sha256=hnEHgySI43-XJJKbI8n6t1A6oVHzR_nYdsBcAwtreBk,4610
13
13
  copyparty/ftpd.py,sha256=1vD-KTy07xfEEEk1dx37pUYModpNO2gIhVXvFUr205M,17497
@@ -24,15 +24,15 @@ copyparty/smbd.py,sha256=8zkC9BjVtGiKXMLajbdakxoKeFzACdM75SW0_SvqXJA,14490
24
24
  copyparty/ssdp.py,sha256=8iyF5sqIjATJLWcAtnJa8eadHosOn0CP4ywltzJ7bVY,7023
25
25
  copyparty/star.py,sha256=tV5BbX6AiQ7N4UU8DYtSTckNYeoeey4DBqq4LjfymbY,3818
26
26
  copyparty/sutil.py,sha256=JTMrQwcWH85hXB_cKG206eDZ967WZDGaP00AWvl_gB0,3214
27
- copyparty/svchub.py,sha256=v0f8KU65dj2MXOjrd-7kckgiFZSu8kpnTUMyLfn9NIM,38429
27
+ copyparty/svchub.py,sha256=gxvBZ3LTeBCtJnrGTSRl908z6zKj0SY5vJOmFMbMXYk,38017
28
28
  copyparty/szip.py,sha256=tor4yjdHhEL4Ox-Xg7-cuUFrMO0IwQD29aRX5Cp8MYs,8605
29
29
  copyparty/tcpsrv.py,sha256=jM_Za64O8LEMfMrU4irJluIJZrU494e2b759r_KhaUQ,19881
30
30
  copyparty/tftpd.py,sha256=jZbf2JpeJmkuQWJErmAPG-dKhtYNvIUHbkAgodSXw9Y,13582
31
31
  copyparty/th_cli.py,sha256=o6FMkerYvAXS455z3DUossVztu_nzFlYSQhs6qN6Jt8,4636
32
32
  copyparty/th_srv.py,sha256=27IftjIXUQzRRiUytt-CgXkybEoP3HHHoXaDAvxEmLo,29217
33
33
  copyparty/u2idx.py,sha256=t4mzjj2GDrkjIHt0RM68y1EgT5qOBoz6mkYgjMbqA38,13526
34
- copyparty/up2k.py,sha256=K2FIxvL4RQ9Er8pH7gQnIU-OHdOJJ5P60oW-SX-silQ,155373
35
- copyparty/util.py,sha256=aWKONxHJh73PCvq9aKKJW9LcU3EqiZynpQ-b1MvK_50,88604
34
+ copyparty/up2k.py,sha256=kDc6GwMGSyvF8hgx2digIIy2LX8RKGj1diRSFrBD0EY,159274
35
+ copyparty/util.py,sha256=qkwrCRqDI7iCiO3X2RQ1LdGVblnkIQe1YTuJPlF27u4,88666
36
36
  copyparty/bos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  copyparty/bos/bos.py,sha256=Wb7eWsXJgR5AFlBR9ZOyKrLTwy-Kct9RrGiOu4Jo37Y,1622
38
38
  copyparty/bos/path.py,sha256=yEjCq2ki9CvxA5sCT8pS0keEXwugs0ZeUyUhdBziOCI,777
@@ -55,9 +55,9 @@ copyparty/stolen/ifaddr/_posix.py,sha256=-67NdfGrCktfQPakT2fLbjl2U00QMvyBGkSvrUu
55
55
  copyparty/stolen/ifaddr/_shared.py,sha256=uNC4SdEIgdSLKvuUzsf1aM-H1Xrc_9mpLoOT43YukGs,6206
56
56
  copyparty/stolen/ifaddr/_win32.py,sha256=EE-QyoBgeB7lYQ6z62VjXNaRozaYfCkaJBHGNA8QtZM,4026
57
57
  copyparty/web/baguettebox.js.gz,sha256=4dS8-r4si84ca71l98672ahnRI86Aq95MU-bc5knykk,7962
58
- copyparty/web/browser.css.gz,sha256=PoW_IIwFigZaMo3atpPU0o05Jj5Flbsm1bhW_KfcX-U,11491
58
+ copyparty/web/browser.css.gz,sha256=-y8OT0wvGrZnZvFxas0_Nob4pf9_1zNABX2nUaAj6Hs,11579
59
59
  copyparty/web/browser.html,sha256=vvfWiu_aOFRar8u5lridMRKQSPF4R0YkA41zrsh82Qs,4878
60
- copyparty/web/browser.js.gz,sha256=vbgNK0skbOyQHqUeYbLxi0qkdAXzWBTGxhgFaKBhxx8,80863
60
+ copyparty/web/browser.js.gz,sha256=CzvjCOUR_faGx5PLA4rR7XDNdp-N-ZCHJ3rJLUim9kw,84721
61
61
  copyparty/web/browser2.html,sha256=NRUZ08GH-e2YcGXcoz0UjYg6JIVF42u4IMX4HHwWTmg,1587
62
62
  copyparty/web/cf.html,sha256=lJThtNFNAQT1ClCHHlivAkDGE0LutedwopXD62Z8Nys,589
63
63
  copyparty/web/dbg-audio.js.gz,sha256=Ma-KZtK8LnmiwNvNKFKXMPYl_Nn_3U7GsJ6-DRWC2HE,688
@@ -80,12 +80,12 @@ copyparty/web/splash.js.gz,sha256=pxEHaRDpxTnW6WdRWpKlRux8jtI7B5RImRjUVs9gdQQ,25
80
80
  copyparty/web/svcs.html,sha256=v0C3cOFWXYlvp3GEifz1Qj0W3MD8JANT3WTON05GZ9o,11797
81
81
  copyparty/web/svcs.js.gz,sha256=k81ZvZ3I-f4fMHKrNGGOgOlvXnCBz0mVjD-8mieoWCA,520
82
82
  copyparty/web/ui.css.gz,sha256=ae1JosPYS8d2F9e_b95bTwa7qYwk8Ur_UhoVpRYEp0Y,2658
83
- copyparty/web/up2k.js.gz,sha256=M9n-VLZP7olT-kL5FfT_X_FW1FrQV-bB3_xPhhFic3U,22943
84
- copyparty/web/util.js.gz,sha256=dPuhXEBJ_T-d2tYUUufGTUul4FYIbuh6GQmtK7iBkEo,14682
83
+ copyparty/web/up2k.js.gz,sha256=t6mUaIYN8lrEJoWO_-2MZ9nIuEplimQgCqdWSsHnuGA,22811
84
+ copyparty/web/util.js.gz,sha256=1umlPsqbe_uuM_xX1QwIp9KC8KQAOFUh3aq6HxozIms,14682
85
85
  copyparty/web/w.hash.js.gz,sha256=7wP9EZQNXQxwZnCCFUVsi_-6TM9PLZJeZ9krutXRRj8,1060
86
86
  copyparty/web/a/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
87
  copyparty/web/a/partyfuse.py,sha256=MuRkaSuYsdfWfBFMOkbPwDXqSvNTw3sd7QhhlKCDZ8I,32311
88
- copyparty/web/a/u2c.py,sha256=WG9njRxY9g9xjO93-fas9Wo-AM8vtrWrUTwpJ5Afmvk,42482
88
+ copyparty/web/a/u2c.py,sha256=jeSwm0sD-uxXQbKX9srI36-RJQHL3t6EEupD-lv5xdo,44960
89
89
  copyparty/web/a/webdav-cfg.bat,sha256=Y4NoGZlksAIg4cBMb7KdJrpKC6Nx97onaTl6yMjaimk,1449
90
90
  copyparty/web/dd/2.png,sha256=gJ14XFPzaw95L6z92fSq9eMPikSQyu-03P1lgiGe0_I,258
91
91
  copyparty/web/dd/3.png,sha256=4lho8Koz5tV7jJ4ODo6GMTScZfkqsT05yp48EDFIlyg,252
@@ -105,9 +105,9 @@ copyparty/web/deps/prismd.css.gz,sha256=ObUlksQVr-OuYlTz-I4B23TeBg2QDVVGRnWBz8cV
105
105
  copyparty/web/deps/scp.woff2,sha256=w99BDU5i8MukkMEL-iW0YO9H4vFFZSPWxbkH70ytaAg,8612
106
106
  copyparty/web/deps/sha512.ac.js.gz,sha256=lFZaCLumgWxrvEuDr4bqdKHsqjX82AbVAb7_F45Yk88,7033
107
107
  copyparty/web/deps/sha512.hw.js.gz,sha256=vqoXeracj-99Z5MfY3jK2N4WiSzYQdfjy0RnUlQDhSU,8110
108
- copyparty-1.14.4.dist-info/LICENSE,sha256=gOr4h33pCsBEg9uIy9AYmb7qlocL4V9t2uPJS5wllr0,1072
109
- copyparty-1.14.4.dist-info/METADATA,sha256=9NRNTHBaC3QV4ruHXtp6kuOcakkBtme8iY17_lRmaPI,131776
110
- copyparty-1.14.4.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
111
- copyparty-1.14.4.dist-info/entry_points.txt,sha256=4zw6a3rqASywQomiYLObjjlxybaI65LYYOTJwgKz7b0,128
112
- copyparty-1.14.4.dist-info/top_level.txt,sha256=LnYUPsDyk-8kFgM6YJLG4h820DQekn81cObKSu9g-sI,10
113
- copyparty-1.14.4.dist-info/RECORD,,
108
+ copyparty-1.15.0.dist-info/LICENSE,sha256=gOr4h33pCsBEg9uIy9AYmb7qlocL4V9t2uPJS5wllr0,1072
109
+ copyparty-1.15.0.dist-info/METADATA,sha256=DL4nbPuSPVsK8XTadsayPyqsiCkgDbOhhbMYKearcqg,134165
110
+ copyparty-1.15.0.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
111
+ copyparty-1.15.0.dist-info/entry_points.txt,sha256=4zw6a3rqASywQomiYLObjjlxybaI65LYYOTJwgKz7b0,128
112
+ copyparty-1.15.0.dist-info/top_level.txt,sha256=LnYUPsDyk-8kFgM6YJLG4h820DQekn81cObKSu9g-sI,10
113
+ copyparty-1.15.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (74.1.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5