PyPI - promnesia - Versions diffs - 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl - Mend

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

promnesia/__init__.py +18 -4
promnesia/__main__.py +104 -78
promnesia/cannon.py +108 -107
promnesia/common.py +107 -88
promnesia/compare.py +33 -30
promnesia/compat.py +10 -10
promnesia/config.py +37 -34
promnesia/database/common.py +4 -3
promnesia/database/dump.py +13 -13
promnesia/database/load.py +7 -7
promnesia/extract.py +19 -17
promnesia/logging.py +27 -15
promnesia/misc/install_server.py +32 -27
promnesia/server.py +106 -79
promnesia/sources/auto.py +104 -77
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +20 -10
promnesia/sources/browser_legacy.py +65 -50
promnesia/sources/demo.py +7 -8
promnesia/sources/fbmessenger.py +3 -3
promnesia/sources/filetypes.py +22 -16
promnesia/sources/github.py +9 -8
promnesia/sources/guess.py +6 -2
promnesia/sources/hackernews.py +7 -9
promnesia/sources/hpi.py +5 -3
promnesia/sources/html.py +11 -7
promnesia/sources/hypothesis.py +3 -2
promnesia/sources/instapaper.py +3 -2
promnesia/sources/markdown.py +22 -12
promnesia/sources/org.py +36 -17
promnesia/sources/plaintext.py +41 -39
promnesia/sources/pocket.py +5 -3
promnesia/sources/reddit.py +24 -26
promnesia/sources/roamresearch.py +5 -2
promnesia/sources/rss.py +6 -8
promnesia/sources/shellcmd.py +21 -11
promnesia/sources/signal.py +27 -26
promnesia/sources/smscalls.py +2 -3
promnesia/sources/stackexchange.py +5 -4
promnesia/sources/takeout.py +37 -34
promnesia/sources/takeout_legacy.py +29 -19
promnesia/sources/telegram.py +18 -12
promnesia/sources/telegram_legacy.py +22 -11
promnesia/sources/twitter.py +7 -6
promnesia/sources/vcs.py +11 -6
promnesia/sources/viber.py +11 -10
promnesia/sources/website.py +8 -7
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +13 -7
promnesia/tests/common.py +10 -5
promnesia/tests/server_helper.py +13 -10
promnesia/tests/sources/test_auto.py +2 -3
promnesia/tests/sources/test_filetypes.py +11 -8
promnesia/tests/sources/test_hypothesis.py +10 -6
promnesia/tests/sources/test_org.py +9 -5
promnesia/tests/sources/test_plaintext.py +9 -8
promnesia/tests/sources/test_shellcmd.py +13 -13
promnesia/tests/sources/test_takeout.py +3 -5
promnesia/tests/test_cannon.py +256 -239
promnesia/tests/test_cli.py +12 -8
promnesia/tests/test_compare.py +17 -13
promnesia/tests/test_config.py +7 -8
promnesia/tests/test_db_dump.py +15 -15
promnesia/tests/test_extract.py +17 -10
promnesia/tests/test_indexer.py +24 -18
promnesia/tests/test_server.py +12 -13
promnesia/tests/test_traverse.py +0 -2
promnesia/tests/utils.py +3 -7
promnesia-1.4.20250909.dist-info/METADATA +66 -0
promnesia-1.4.20250909.dist-info/RECORD +80 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
promnesia/kjson.py +0 -121
promnesia/sources/__init__.pyi +0 -0
promnesia-1.2.20240810.dist-info/METADATA +0 -54
promnesia-1.2.20240810.dist-info/RECORD +0 -83
promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0

promnesia/common.py CHANGED Viewed

@@ -1,51 +1,55 @@
 from __future__ import annotations
-from contextlib import contextmanager
-from datetime import datetime, date
-from functools import lru_cache
-from glob import glob
 import itertools
 import logging
 import os
-from pathlib import Path
+import re
 import shutil
-from subprocess import run, PIPE, Popen
+import tempfile
+import warnings
+from collections.abc import Callable, Iterable, Sequence
+from contextlib import contextmanager
+from copy import copy
+from datetime import date, datetime, timezone
+from functools import lru_cache
+from glob import glob
+from pathlib import Path
+from subprocess import PIPE, Popen, run
 from timeit import default_timer as timer
 from types import ModuleType
-from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
-import warnings
+from typing import TYPE_CHECKING, NamedTuple, TypeAlias, TypeVar
+from zoneinfo import ZoneInfo
+import platformdirs
 from more_itertools import intersperse
-import pytz
 from .cannon import canonify
-from .compat import removeprefix
 _is_windows = os.name == 'nt'
 T = TypeVar('T')
-Res = Union[T, Exception]
+Res: TypeAlias = T | Exception
-PathIsh = Union[str, Path]
+PathIsh = str | Path
 Url = str
 SourceName = str
-DatetimeIsh = Union[datetime, date]
+DatetimeIsh = datetime | date
 Context = str
 Second = int
 # TODO hmm. arguably, source and context are almost same things...
 class Loc(NamedTuple):
     title: str
-    href: Optional[str]=None
+    href: str | None = None
     @classmethod
-    def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
+    def make(cls, title: str, href: str | None = None) -> Loc:
         return cls(title=title, href=href)
     @classmethod
-    def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
+    def file(cls, path: PathIsh, line: int | None = None, relative_to: Path | None = None) -> Loc:
         lstr = '' if line is None else f':{line}'
         # todo loc should be url encoded? dunno.
         # or use line=? eh. I don't know. Just ask in issues.
@@ -53,11 +57,11 @@ class Loc(NamedTuple):
         # todo: handler has to be overridable by config. This is needed for docker, but also for a "as a service" install, where the sources would be available on some remote webserver
         # maybe it should be treated as a format string, so that {line} may be a part of the result or not.
         # for local usage, editor:///file:line works, but if the txt file is only available through http, it breaks.
-        #if get_config().MIME_HANDLER:
+        # if get_config().MIME_HANDLER:
         #   handler = get_config().MIME_HANDLER
-        #if True:
+        # if True:
         #    handler =  'editor:///home/koom/promnesia/docker/'
-        #else:
+        # else:
         handler = _detect_mime_handler()
         rel = Path(path)
@@ -65,13 +69,10 @@ class Loc(NamedTuple):
             try:
                 # making it relative is a bit nicer for display
                 rel = rel.relative_to(relative_to)
-            except Exception as e:
-                pass # todo log/warn?
+            except Exception:
+                pass  # todo log/warn?
         loc = f'{rel}{lstr}'
-        return cls.make(
-            title=loc,
-            href=f'{handler}{path}{lstr}'
-        )
+        return cls.make(title=loc, href=f'{handler}{path}{lstr}')
     # TODO need some uniform way of string conversion
     # but generally, it will be
@@ -87,19 +88,21 @@ def warn_once(message: str) -> None:
 def _warn_no_xdg_mime() -> None:
-    warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
+    warn_once(
+        "No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1"
+    )
 @lru_cache(1)
 def _detect_mime_handler() -> str:
     def exists(what: str) -> bool:
         try:
-            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
+            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
         except (FileNotFoundError, NotADirectoryError):  # ugh seems that osx might throw NotADirectory for some reason
             _warn_no_xdg_mime()
             return False
         if r.returncode > 0:
-            warnings.warn('xdg-mime failed') # hopefully rest is in stderr
+            warnings.warn('xdg-mime failed')  # hopefully rest is in stderr
             return False
         # todo not sure if should check=True or something
         handler = r.stdout.decode('utf8').strip()
@@ -108,11 +111,13 @@ def _detect_mime_handler() -> str:
     # 1. detect legacy 'emacs:' handler (so it doesn't break for existing users)
     result = None
     if exists('emacs'):
-        warnings.warn('''
+        warnings.warn(
+            '''
         'emacs:' handler is deprecated!
         Please use newer version at https://github.com/karlicoss/open-in-editor
         And remove the old one (most likely, rm ~/.local/share/applications/mimemacs.desktop && update-desktop-database ~/.local/share/applications).
-'''.rstrip())
+'''.rstrip()
+        )
         result = 'emacs:'
     # 2. now try to use newer editor:// thing
@@ -120,10 +125,12 @@ def _detect_mime_handler() -> str:
     # TODO would be nice to collect warnings and display at the end
     if not exists('editor'):
-        warnings.warn('''
+        warnings.warn(
+            '''
         You might want to install https://github.com/karlicoss/open-in-editor
         So you can jump to your text files straight from the browser
-'''.rstrip())
+'''.rstrip()
+        )
     else:
         result = 'editor://'
@@ -139,39 +146,41 @@ class Visit(NamedTuple):
     # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
     dt: datetime
     locator: Loc
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    context: Context | None = None
+    duration: Second | None = None
     # TODO shit. I need to insert it in chrome db....
     # TODO gonna be hard to fill retroactively.
     # spent: Optional[Second] = None
-    debug: Optional[str] = None
+    debug: str | None = None
-Result = Union[Visit, Exception]
+Result = Visit | Exception
 Results = Iterable[Result]
 Extractor = Callable[[], Results]
 Extraction = Result  # TODO deprecate!
 class DbVisit(NamedTuple):
     norm_url: Url
     orig_url: Url
     dt: datetime
     locator: Loc
-    src: Optional[SourceName] = None
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    src: SourceName | None = None
+    context: Context | None = None
+    duration: Second | None = None
     @staticmethod
-    def make(p: Visit, src: SourceName) -> Res['DbVisit']:
+    def make(p: Visit, src: SourceName) -> Res[DbVisit]:
         try:
             # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
             if isinstance(p.dt, datetime):
                 dt = p.dt
             elif isinstance(p.dt, date):
                 # TODO that won't be with timezone..
-                dt = datetime.combine(p.dt, datetime.min.time()) # meh..
+                dt = datetime.combine(p.dt, datetime.min.time())  # meh..
             else:
-                raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
+                raise TypeError(f'unexpected date: {p.dt}, {type(p.dt)}')  # noqa: TRY301
         except Exception as e:
             return e
@@ -196,35 +205,37 @@ Filter = Callable[[Url], bool]
 from .logging import LazyLogger
 logger = LazyLogger('promnesia', level='DEBUG')
 def get_logger() -> logging.Logger:
     # deprecate? no need since logger is lazy already
     return logger
-import tempfile
 # kinda singleton
 @lru_cache(1)
 def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
-    # todo use appdirs?
+    # todo use platformdirs?
     tdir = tempfile.TemporaryDirectory(suffix="promnesia")
     return tdir
 # TODO use mypy literal?
 Syntax = str
 @lru_cache(None)
 def _get_urlextractor(syntax: Syntax):
-    from urlextract import URLExtract # type: ignore
+    from urlextract import URLExtract  # type: ignore[import-untyped]
     u = URLExtract()
     # https://github.com/lipoja/URLExtract/issues/13
-    if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
+    if syntax in {'org', 'orgmode', 'org-mode'}:  # TODO remove hardcoding..
         # handle org-mode links properly..
         u._stop_chars_right |= {'[', ']'}
-        u._stop_chars_left  |= {'[', ']'}
+        u._stop_chars_left |= {'[', ']'}
     elif syntax in {'md', 'markdown'}:
         pass
     # u._stop_chars_right |= {','}
@@ -242,19 +253,19 @@ def _sanitize(url: str) -> str:
     return url
-def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
+def iter_urls(s: str, *, syntax: Syntax = '') -> Iterable[Url]:
     urlextractor = _get_urlextractor(syntax=syntax)
     # note: it also has get_indices, might be useful
     for u in urlextractor.gen_urls(s):
         yield _sanitize(u)
-def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
+def extract_urls(s: str, *, syntax: Syntax = '') -> list[Url]:
     return list(iter_urls(s=s, syntax=syntax))
 def from_epoch(ts: int) -> datetime:
-    return datetime.fromtimestamp(ts, tz=pytz.utc)
+    return datetime.fromtimestamp(ts, tz=timezone.utc)
 def join_tags(tags: Iterable[str]) -> str:
@@ -274,7 +285,7 @@ class PathWithMtime(NamedTuple):
     mtime: float
     @classmethod
-    def make(cls, p: Path) -> 'PathWithMtime':
+    def make(cls, p: Path) -> PathWithMtime:
         return cls(
             path=p,
             mtime=p.stat().st_mtime,
@@ -285,10 +296,7 @@ class PathWithMtime(NamedTuple):
 PreExtractor = Callable[..., Results]
-PreSource = Union[
-    PreExtractor,
-    ModuleType,   # module with 'index' functon defined in it
-]
+PreSource = PreExtractor | ModuleType  # module with 'index' functon defined in it
 # todo not sure about this...
@@ -300,7 +308,7 @@ def _guess_name(thing: PreSource) -> str:
         guess = thing.__module__
     dflt = 'promnesia.sources.'
-    guess = removeprefix(guess, prefix=dflt)
+    guess = guess.removeprefix(dflt)
     if guess == 'config':
         # this happens when we define a lambda in config or something without properly wrapping in Source
         logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
@@ -320,7 +328,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
 class Source:
     # TODO make sure it works with empty src?
     # TODO later, make it properly optional?
-    def __init__(self, ff: PreSource, *args, src: SourceName='', name: SourceName='', **kwargs) -> None:
+    def __init__(self, ff: PreSource, *args, src: SourceName = '', name: SourceName = '', **kwargs) -> None:
         # NOTE: in principle, would be nice to make the Source countructor to be as dumb as possible
         # so we could move _get_index_function inside extractor lambda
         # but that way we get nicer error reporting
@@ -354,6 +362,7 @@ class Source:
         # TODO deprecated!
         return self.name
 # TODO deprecated
 Indexer = Source
@@ -362,13 +371,15 @@ Indexer = Source
 # NOTE: used in configs...
 def last(path: PathIsh, *parts: str) -> Path:
     import os.path
-    pp = os.path.join(str(path), *parts)
-    return Path(max(glob(pp, recursive=True)))
+    pp = os.path.join(str(path), *parts)  # noqa: PTH118
+    return Path(max(glob(pp, recursive=True)))  # noqa: PTH207
-from .logging import setup_logger
-from copy import copy
+from .logging import setup_logger  # noqa: F401
+# TODO get rid of this? not sure if still necessary
 def echain(ex: Exception, cause: Exception) -> Exception:
     e = copy(ex)
     e.__cause__ = cause
@@ -382,50 +393,48 @@ def echain(ex: Exception, cause: Exception) -> Exception:
 def slugify(x: str) -> str:
     # https://stackoverflow.com/a/38766141/706389
-    import re
     valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
     return valid_file_name
 # todo cache?
-def appdirs():
+def _platformdirs() -> platformdirs.PlatformDirs:
     under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
     # todo actually use test name?
     name = 'promnesia-test' if under_test else 'promnesia'
-    import appdirs as ad # type: ignore[import-untyped]
-    return ad.AppDirs(appname=name)
+    return platformdirs.PlatformDirs(appname=name)
 def default_output_dir() -> Path:
     # TODO: on Windows, there are two extra subdirectories (<AppAuthor>\<AppName>)
     # perhaps makes sense to create it here with parents to avoid issues downstream?
-    return Path(appdirs().user_data_dir)
+    return Path(_platformdirs().user_data_dir)
 def default_cache_dir() -> Path:
-    return Path(appdirs().user_cache_dir)
+    return Path(_platformdirs().user_cache_dir)
 # make it lazy, otherwise it might crash on module import (e.g. on Windows)
 # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
 @lru_cache(1)
-def _magic() -> Callable[[PathIsh], Optional[str]]:
+def _magic() -> Callable[[PathIsh], str | None]:
     logger = get_logger()
     try:
-        import magic # type: ignore
+        import magic  # type: ignore[import-not-found]
     except Exception as e:
         logger.exception(e)
-        defensive_msg: Optional[str] = None
+        defensive_msg: str | None = None
         if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
             defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
         elif isinstance(e, ImportError):
-            emsg = getattr(e, 'msg', '') # make mypy happy
-            if 'failed to find libmagic' in emsg: # probably the actual library is missing?...
+            emsg = getattr(e, 'msg', '')  # make mypy happy
+            if 'failed to find libmagic' in emsg:  # probably the actual library is missing?...
                 defensive_msg = "couldn't import magic. See https://github.com/ahupp/python-magic#installation"
         if defensive_msg is not None:
             logger.warning(defensive_msg)
             warnings.warn(defensive_msg)
-            return lambda path: None # stub
+            return lambda path: None  # stub  # noqa: ARG005
         else:
             raise e
     else:
@@ -437,11 +446,12 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
 @lru_cache(1)
 def _mimetypes():
     import mimetypes
     mimetypes.init()
     return mimetypes
-def mime(path: PathIsh) -> Optional[str]:
+def mime(path: PathIsh) -> str | None:
     ps = str(path)
     mimetypes = _mimetypes()
     # first try mimetypes, it's only using the filename without opening the file
@@ -453,7 +463,7 @@ def mime(path: PathIsh) -> Optional[str]:
     return magic(ps)
-def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     prune_dir_args = []
     ignore_file_args = []
     if ignore:
@@ -473,10 +483,10 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
         *prune_dir_args,
         '-type', 'f',
         *ignore_file_args
-    ]
+    ]  # fmt: skip
-def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     from .config import extra_fd_args
     ignore_args = []
@@ -493,10 +503,10 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
         '--type', 'f',
         '.',
         str(root),
-    ]
+    ]  # fmt: skip
-def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
+def traverse(root: Path, *, follow: bool = True, ignore: Sequence[str] = ()) -> Iterable[Path]:
     if not root.is_dir():
         yield root
         return
@@ -515,12 +525,14 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
     cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
     # try to use fd.. it cooperates well with gitignore etc, also faster than find
-    for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
+    for x in ('fd', 'fd-find', 'fdfind'):  # has different names on different dists..
         if shutil.which(x):
             cmd = [x, *fdfind_args(root, follow=follow, ignore=ignore)]
             break
     else:
-        warnings.warn("'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'")
+        warnings.warn(
+            "'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'"
+        )
     logger.debug('running: %s', cmd)
     # TODO split by \0?
@@ -537,6 +549,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
 def get_system_zone() -> str:
     try:
         import tzlocal
         return tzlocal.get_localzone_name()
     except Exception as e:
         logger.exception(e)
@@ -545,14 +558,15 @@ def get_system_zone() -> str:
 @lru_cache(1)
-def get_system_tz() -> pytz.BaseTzInfo:
+def get_system_tz() -> ZoneInfo:
     zone = get_system_zone()
     try:
-        return pytz.timezone(zone)
+        return ZoneInfo(zone)
     except Exception as e:
         logger.exception(e)
         logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
-        return pytz.utc
+        return ZoneInfo('UTC')
 # used in misc/install_server.py
 def root() -> Path:
@@ -574,7 +588,7 @@ def user_config_file() -> Path:
     if "PROMNESIA_CONFIG" in os.environ:
         return Path(os.environ["PROMNESIA_CONFIG"])
     else:
-        return Path(appdirs().user_config_dir) / 'config.py'
+        return Path(_platformdirs().user_config_dir) / 'config.py'
 def default_config_path() -> Path:
@@ -589,7 +603,7 @@ def default_config_path() -> Path:
 @contextmanager
-def measure(tag: str='', *, logger: logging.Logger, unit: str='ms'):
+def measure(tag: str = '', *, logger: logging.Logger, unit: str = 'ms'):
     before = timer()
     yield lambda: timer() - before
     after = timer()
@@ -605,3 +619,8 @@ def is_sqlite_db(x: Path) -> bool:
         'application/vnd.sqlite3',
         # TODO this mime can also match wal files/journals, not sure
     }
+if not TYPE_CHECKING:
+    # todo deprecate properly --just backwards compat
+    from .compat import removeprefix  # noqa: F401

promnesia/compare.py CHANGED Viewed

@@ -1,69 +1,71 @@
-#!/usr/bin/env python3
+from __future__ import annotations
 # TODO perhaps make it external script?
 import argparse
-from pathlib import Path
 import logging
 import sys
-from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
+from collections.abc import Iterator, Sequence
+from pathlib import Path
+from typing import TypeVar
-from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
+from .common import DbVisit, PathWithMtime, Url
 from .database.load import row_to_db_visit
 # TODO include latest too?
 # from cconfig import ignore, filtered
 def get_logger():
     return logging.getLogger('promnesia-db-changes')
-# TODO return error depending on severity?
-from typing import TypeVar, Sequence
+# TODO return error depending on severity?
 T = TypeVar('T')
 def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
-    def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
-        res: Dict[str, List[T]] = {}
+    def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
+        res: dict[str, list[T]] = {}
         for a in s:
             k = key(a)
-            ll = res.get(k, None)
+            ll = res.get(k)
             if ll is None:
                 ll = []
                 res[k] = ll
             ll.append(a)
         return res
     da = make_dict(sa)
     db = make_dict(sb)
     ka = set(da.keys())
     kb = set(db.keys())
-    onlya: Set[T] = set()
-    common: Set[T] = set()
-    onlyb: Set[T] = set()
+    onlya: set[T] = set()
+    common: set[T] = set()
+    onlyb: set[T] = set()
     for k in ka.union(kb):
         la = da.get(k, [])
         lb = db.get(k, [])
-        common.update(la[:min(len(la), len(lb))])
+        common.update(la[: min(len(la), len(lb))])
         if len(la) > len(lb):
-            onlya.update(la[len(lb):])
+            onlya.update(la[len(lb) :])
         if len(lb) > len(la):
-            onlyb.update(lb[len(la):])
+            onlyb.update(lb[len(la) :])
     return onlya, common, onlyb
-def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
+def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
     logger = get_logger()
     logger.info('comparing between: %s', between)
-    errors: List[DbVisit] = []
+    errors: list[DbVisit] = []
-    umap: Dict[Url, List[DbVisit]] = {}
+    umap: dict[Url, list[DbVisit]] = {}
     for a in after:
         url = a.norm_url
-        xx = umap.get(url, []) # TODO canonify here?
+        xx = umap.get(url, [])  # TODO canonify here?
         xx.append(a)
         umap[url] = xx
@@ -71,8 +73,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
         errors.append(b)
         if log:
             logger.error('between %s missing %s', between, b)
-            print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
+            print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
     # the idea is that we eliminate items simultaneously from both sets
     eliminations = [
@@ -80,7 +81,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
         ('without dt'             , lambda x: x._replace(src='', dt='')),
         ('without context'        , lambda x: x._replace(src='',        context='', locator='')),
         ('without dt and context' , lambda x: x._replace(src='', dt='', context='', locator='')),
-    ]
+    ]  # fmt: skip
     for ename, ekey in eliminations:
         logger.info('eliminating by %s', ename)
         logger.info('before: %d, after: %d', len(before), len(after))
@@ -96,6 +97,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
     return errors
 def setup_parser(p):
     # TODO better name?
     p.add_argument('--intermediate-dir', type=Path)
@@ -108,8 +110,8 @@ def get_files(args):
     if len(args.paths) == 0:
         int_dir = args.intermediate_dir
         assert int_dir.exists()
-        files = list(sorted(int_dir.glob('*.sqlite*')))
-        files = files[-args.last:]
+        files = sorted(int_dir.glob('*.sqlite*'))
+        files = files[-args.last :]
     else:
         files = [Path(p) for p in args.paths]
     return files
@@ -126,7 +128,7 @@ def main():
         sys.exit(1)
-def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
+def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
     assert len(files) > 0
     logger = get_logger()
@@ -137,9 +139,10 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
     for f in files:
         logger.info('processing %r', f)
         name = f.name
-        this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
+        this_dts = name[0 : name.index('.')]  # can't use stem due to multiple extensions..
+        from promnesia.server import _get_stuff  # TODO ugh
-        from promnesia.server import _get_stuff # TODO ugh
         engine, table = _get_stuff(PathWithMtime.make(f))
         with engine.connect() as conn:
@@ -153,6 +156,6 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
         last = vis
         last_dts = this_dts
 if __name__ == '__main__':
     main()

promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl