PyPI - promnesia - Versions diffs - 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl - Mend

promnesia 1.2.20240810py3-none-any.whl → 1.3.20241021py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

promnesia/__init__.py +14 -3
promnesia/__main__.py +38 -25
promnesia/cannon.py +23 -23
promnesia/common.py +49 -42
promnesia/compare.py +18 -20
promnesia/compat.py +10 -10
promnesia/config.py +20 -22
promnesia/database/common.py +4 -3
promnesia/database/dump.py +14 -13
promnesia/database/load.py +7 -7
promnesia/extract.py +13 -11
promnesia/kjson.py +11 -10
promnesia/logging.py +1 -1
promnesia/misc/install_server.py +7 -8
promnesia/server.py +42 -31
promnesia/sources/auto.py +43 -30
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +14 -9
promnesia/sources/browser_legacy.py +17 -13
promnesia/sources/demo.py +7 -7
promnesia/sources/fbmessenger.py +3 -2
promnesia/sources/filetypes.py +9 -7
promnesia/sources/github.py +5 -7
promnesia/sources/guess.py +2 -1
promnesia/sources/hackernews.py +2 -2
promnesia/sources/hpi.py +2 -2
promnesia/sources/html.py +7 -5
promnesia/sources/hypothesis.py +3 -2
promnesia/sources/instapaper.py +2 -2
promnesia/sources/markdown.py +17 -7
promnesia/sources/org.py +20 -10
promnesia/sources/plaintext.py +30 -31
promnesia/sources/pocket.py +3 -2
promnesia/sources/reddit.py +19 -18
promnesia/sources/roamresearch.py +2 -1
promnesia/sources/rss.py +3 -4
promnesia/sources/shellcmd.py +19 -6
promnesia/sources/signal.py +14 -13
promnesia/sources/smscalls.py +2 -2
promnesia/sources/stackexchange.py +3 -2
promnesia/sources/takeout.py +23 -13
promnesia/sources/takeout_legacy.py +15 -11
promnesia/sources/telegram.py +13 -11
promnesia/sources/telegram_legacy.py +18 -7
promnesia/sources/twitter.py +6 -5
promnesia/sources/vcs.py +5 -3
promnesia/sources/viber.py +10 -9
promnesia/sources/website.py +4 -4
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +7 -4
promnesia/tests/common.py +8 -5
promnesia/tests/server_helper.py +11 -8
promnesia/tests/sources/test_auto.py +2 -3
promnesia/tests/sources/test_filetypes.py +2 -1
promnesia/tests/sources/test_hypothesis.py +3 -3
promnesia/tests/sources/test_org.py +2 -3
promnesia/tests/sources/test_plaintext.py +0 -1
promnesia/tests/sources/test_shellcmd.py +3 -4
promnesia/tests/sources/test_takeout.py +3 -5
promnesia/tests/test_cannon.py +5 -5
promnesia/tests/test_cli.py +4 -6
promnesia/tests/test_compare.py +1 -1
promnesia/tests/test_config.py +7 -8
promnesia/tests/test_db_dump.py +11 -12
promnesia/tests/test_extract.py +10 -6
promnesia/tests/test_indexer.py +14 -8
promnesia/tests/test_server.py +2 -3
promnesia/tests/test_traverse.py +0 -2
promnesia/tests/utils.py +4 -4
{promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
promnesia-1.3.20241021.dist-info/RECORD +83 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
promnesia-1.2.20240810.dist-info/RECORD +0 -83
{promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0

promnesia/__init__.py CHANGED Viewed

@@ -1,6 +1,17 @@
-from pathlib import Path
-from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res
 # add deprecation warning so eventually this may converted to a namespace package?
 import warnings
+from .common import (  # noqa: F401
+    Context,
+    DbVisit,
+    Loc,
+    PathIsh,
+    Res,
+    Results,
+    Source,
+    Visit,
+    last,
+)
+# TODO think again about it -- what are the pros and cons?
 warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning)

promnesia/__main__.py CHANGED Viewed

@@ -5,24 +5,34 @@ import ast
 import importlib
 import inspect
 import os
-from pathlib import Path
+import shlex
 import shutil
-from subprocess import run, check_call, Popen
 import sys
+from collections.abc import Iterable, Iterator, Sequence
+from pathlib import Path
+from subprocess import Popen, check_call, run
 from tempfile import TemporaryDirectory, gettempdir
-from typing import Callable, Sequence, Iterable, Iterator, Union
-from . import config
-from . import server
-from .misc import install_server
-from .common import Extractor, PathIsh, logger, get_tmpdir, DbVisit, Res
-from .common import Source, get_system_tz, user_config_file, default_config_path
+from typing import Callable
+from . import config, server
+from .common import (
+    DbVisit,
+    Extractor,
+    PathIsh,
+    Res,
+    Source,
+    default_config_path,
+    get_system_tz,
+    get_tmpdir,
+    logger,
+    user_config_file,
+)
 from .database.dump import visits_to_sqlite
 from .extract import extract_visits
+from .misc import install_server
-def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]:
+def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]:
     cfg = config.get()
     output_dir = cfg.output_dir
     # not sure if belongs here??
@@ -74,7 +84,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
         logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
-def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]:
+def _do_index(*, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False) -> Iterable[Exception]:
     # also keep & return errors for further display
     errors: list[Exception] = []
     def it() -> Iterable[Res[DbVisit]]:
@@ -98,9 +108,10 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
 def do_index(
     config_file: Path,
-    dry: bool=False,
-    sources_subset: Iterable[Union[str, int]]=(),
-    overwrite_db: bool=False,
+    *,
+    dry: bool = False,
+    sources_subset: Iterable[str | int] = (),
+    overwrite_db: bool = False,
 ) -> Sequence[Exception]:
     config.load_from(config_file) # meh.. should be cleaner
     try:
@@ -120,7 +131,8 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]:
     def lazy(name: str) -> Callable[[], Extractor]:
         # helper to avoid failed imports etc, since people might be lacking necessary dependencies
         def inner() -> Extractor:
-            from . import sources
+            # TODO why this import??
+            from . import sources  # noqa: F401
             module = importlib.import_module(f'promnesia.sources.{name}')
             return getattr(module, 'index')
         return inner
@@ -145,7 +157,7 @@ def do_demo(
         config_file: Path | None,
         dry: bool=False,
         name: str='demo',
-        sources_subset: Iterable[Union[str, int]]=(),
+        sources_subset: Iterable[str | int]=(),
         overwrite_db: bool=False,
     ) -> None:
     with TemporaryDirectory() as tdir:
@@ -219,9 +231,10 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
     logger.info('config: %s', cfg)
     def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
-        logger.debug(' '.join(map(str, cmd)))
-        res = run(cmd, **kwargs)
+        logger.debug(shlex.join(map(str, cmd)))
+        res = run(cmd, **kwargs)  # noqa: PLW1510
         if res.returncode > 0:
+            # TODO what's up with empty exception??
             yield Exception()
     logger.info('Checking syntax...')
@@ -239,7 +252,7 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
     # todo not sure if should be more defensive than check_call here
     logger.info('Checking type safety...')
     try:
-        import mypy
+        import mypy  # noqa: F401
     except ImportError:
         logger.warning("mypy not found, can't use it to check config!")
     else:
@@ -291,7 +304,7 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
     logger.info('You should see the database path and version above!')
-def _ordinal_or_name(s: str) -> Union[str, int]:
+def _ordinal_or_name(s: str) -> str | int:
     try:
         s = int(s)  # type: ignore
     except ValueError:
@@ -328,7 +341,7 @@ def main() -> None:
     F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
     p = argparse.ArgumentParser(formatter_class=F)
-    subp = p.add_subparsers(dest='mode', )
+    subp = p.add_subparsers(dest='mode' )
     ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
     add_index_args(ep, default_config_path())
     # TODO use some way to override or provide config only via cmdline?
@@ -348,7 +361,7 @@ def main() -> None:
     ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server')
     ap.add_argument(
         '--as',
-        choices=list(sorted(demo_sources().keys())),
+        choices=sorted(demo_sources().keys()),
         default='guess',
         help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
     )
@@ -359,7 +372,7 @@ def main() -> None:
     install_server.setup_parser(isp)
     cp = subp.add_parser('config', help='Config management')
-    cp.set_defaults(func=lambda *args: cp.print_help())
+    cp.set_defaults(func=lambda *_args: cp.print_help())
     scp = cp.add_subparsers()
     ccp = scp.add_parser('check', help='Check config')
     ccp.set_defaults(func=config_check)
@@ -373,7 +386,7 @@ def main() -> None:
     dp = subp.add_parser('doctor', help='Troubleshooting assistant')
     dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
-    dp.set_defaults(func=lambda *args: dp.print_help())
+    dp.set_defaults(func=lambda *_args: dp.print_help())
     sdp = dp.add_subparsers()
     sdp.add_parser('config'  , help='Check config'    ).set_defaults(func=config_check )
     sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db)

promnesia/cannon.py CHANGED Viewed

@@ -9,16 +9,17 @@ are same content, but you can't tell that by URL equality. Even canonical urls a
 Also some experiments to establish 'URL hierarchy'.
 """
-# TODO eh?? they fixed mobile.twitter.com?
+from __future__ import annotations
-from itertools import chain
 import re
 import typing
-from typing import Iterable, NamedTuple, Set, Optional, List, Sequence, Union, Tuple, Dict, Any, Collection
 import urllib.parse
-from urllib.parse import urlsplit, parse_qsl, urlunsplit, parse_qs, urlencode, SplitResult
+from collections.abc import Collection, Iterable, Sequence
+# TODO eh?? they fixed mobile.twitter.com?
+from itertools import chain
+from typing import Any, NamedTuple, Union
+from urllib.parse import SplitResult, parse_qsl, urlencode, urlsplit, urlunsplit
 # this has some benchmark, but quite a few librarires seem unmaintained, sadly
 # I guess i'll stick to default for now, until it's a critical bottleneck
@@ -108,11 +109,11 @@ default_qkeep = [
 # TODO perhaps, decide if fragment is meaningful (e.g. wiki) or random sequence of letters?
 class Spec(NamedTuple):
-    qkeep  : Optional[Union[Collection[str], bool]] = None
-    qremove: Optional[Set[str]] = None
+    qkeep  : Collection[str] | bool | None = None
+    qremove: set[str] | None = None
     fkeep  : bool = False
-    def keep_query(self, q: str) -> Optional[int]: # returns order
+    def keep_query(self, q: str) -> int | None: # returns order
         if self.qkeep is True:
             return 1
         qkeep = {
@@ -134,13 +135,13 @@ class Spec(NamedTuple):
         return None
     @classmethod
-    def make(cls, **kwargs) -> 'Spec':
+    def make(cls, **kwargs) -> Spec:
         return cls(**kwargs)
 S = Spec
 # TODO perhaps these can be machine learnt from large set of urls?
-specs: Dict[str, Spec] = {
+specs: dict[str, Spec] = {
     'youtube.com': S(
         # TODO search_query?
         qkeep=[ # note: experimental.. order matters here
@@ -178,7 +179,6 @@ specs: Dict[str, Spec] = {
             'source', 'tsid', 'refsrc', 'pnref', 'rc', '_rdr', 'src', 'hc_location', 'section', 'permPage', 'soft', 'pn_ref', 'action',
             'ti', 'aref', 'event_time_id', 'action_history', 'filter', 'ref_notif_type', 'has_source', 'source_newsfeed_story_type',
-            'ref_notif_type',
         },
     ),
     'physicstravelguide.com': S(fkeep=True), # TODO instead, pass fkeep marker object for shorter spec?
@@ -218,10 +218,10 @@ Spec2 = Any # TODO
 # TODO this should be a map
 Frag = Any
-Parts = Sequence[Tuple[str, str]]
+Parts = Sequence[tuple[str, str]]
-def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts, Frag]:
+def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> tuple[Any, Any, Parts, Frag]:
     if path[:5] == '/from':
         site = dict(qq).get('site')
         if site is not None:
@@ -232,7 +232,7 @@ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts,
     # TODO this should be in-place? for brevity?
     return (domain, path, qq, frag)
-def get_spec2(dom: str) -> Optional[Spec2]:
+def get_spec2(dom: str) -> Spec2 | None:
     return {
         'news.ycombinator.com': _yc,
     }.get(dom)
@@ -285,10 +285,10 @@ def transform_split(split: SplitResult):
     REST = r'(?P<rest>.*)'
     Left = Union[str, Sequence[str]]
-    Right = Tuple[str, str, str]
+    Right = tuple[str, str, str]
     # the idea is that we can unify certain URLs here and map them to the 'canonical' one
     # this is a dict only for grouping but should be a list really.. todo
-    rules: Dict[Left, Right] = {
+    rules: dict[Left, Right] = {
         # TODO m. handling might be quite common
         # f'm.youtube.com/{REST}': ('youtube.com', '{rest}'),
         (
@@ -322,9 +322,9 @@ def transform_split(split: SplitResult):
             continue
         gd = m.groupdict()
         if len(to) == 2:
-            to = to + ('', )
+            to = (*to, '')
-        (netloc, path, qq) = [t.format(**gd) for t in to]
+        (netloc, path, qq) = (t.format(**gd) for t in to)
         qparts.extend(parse_qsl(qq, keep_blank_values=True)) # TODO hacky..
         # TODO eh, qparts should really be a map or something...
         break
@@ -361,7 +361,7 @@ def myunsplit(domain: str, path: str, query: str, fragment: str) -> str:
 #     ]
 #     for re in regexes:
-def handle_archive_org(url: str) -> Optional[str]:
+def handle_archive_org(url: str) -> str | None:
     are = r'web.archive.org/web/(?P<timestamp>\d+)/(?P<rest>.*)'
     m = re.fullmatch(are, url)
     if m is None:
@@ -697,8 +697,8 @@ def groups(it, args): # pragma: no cover
     all_pats = get_patterns()
     from collections import Counter
-    c: typing.Counter[Optional[str]] = Counter()
-    unmatched: List[str] = []
+    c: typing.Counter[str | None] = Counter()
+    unmatched: list[str] = []
     def dump():
         print(c)
@@ -756,10 +756,10 @@ def groups(it, args): # pragma: no cover
 def display(it, args) -> None: # pragma: no cover
     # TODO better name?
     import difflib
-    # pylint: disable=import-error
-    from termcolor import colored as C # type: ignore
     from sys import stdout
+    from termcolor import colored as C  # type: ignore
     for line in it:
         line = line.strip()
         if args.human:

promnesia/common.py CHANGED Viewed

@@ -1,26 +1,28 @@
 from __future__ import annotations
-from contextlib import contextmanager
-from datetime import datetime, date
-from functools import lru_cache
-from glob import glob
 import itertools
 import logging
 import os
-from pathlib import Path
+import re
 import shutil
-from subprocess import run, PIPE, Popen
+import tempfile
+import warnings
+from collections.abc import Iterable, Sequence
+from contextlib import contextmanager
+from copy import copy
+from datetime import date, datetime
+from functools import lru_cache
+from glob import glob
+from pathlib import Path
+from subprocess import PIPE, Popen, run
 from timeit import default_timer as timer
 from types import ModuleType
-from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
-import warnings
+from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, TypeVar, Union
-from more_itertools import intersperse
 import pytz
+from more_itertools import intersperse
 from .cannon import canonify
-from .compat import removeprefix
 _is_windows = os.name == 'nt'
@@ -38,14 +40,14 @@ Second = int
 # TODO hmm. arguably, source and context are almost same things...
 class Loc(NamedTuple):
     title: str
-    href: Optional[str]=None
+    href: Optional[str] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
     @classmethod
-    def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
+    def make(cls, title: str, href: str | None=None) -> Loc:
         return cls(title=title, href=href)
     @classmethod
-    def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
+    def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc:
         lstr = '' if line is None else f':{line}'
         # todo loc should be url encoded? dunno.
         # or use line=? eh. I don't know. Just ask in issues.
@@ -94,7 +96,7 @@ def _warn_no_xdg_mime() -> None:
 def _detect_mime_handler() -> str:
     def exists(what: str) -> bool:
         try:
-            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
+            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
         except (FileNotFoundError, NotADirectoryError):  # ugh seems that osx might throw NotADirectory for some reason
             _warn_no_xdg_mime()
             return False
@@ -139,12 +141,12 @@ class Visit(NamedTuple):
     # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
     dt: datetime
     locator: Loc
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    context: Context | None = None
+    duration: Second | None = None
     # TODO shit. I need to insert it in chrome db....
     # TODO gonna be hard to fill retroactively.
     # spent: Optional[Second] = None
-    debug: Optional[str] = None
+    debug: str | None = None
 Result = Union[Visit, Exception]
 Results = Iterable[Result]
@@ -157,12 +159,12 @@ class DbVisit(NamedTuple):
     orig_url: Url
     dt: datetime
     locator: Loc
-    src: Optional[SourceName] = None
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    src: Optional[SourceName] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
+    context: Optional[Context] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
+    duration: Optional[Second] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
     @staticmethod
-    def make(p: Visit, src: SourceName) -> Res['DbVisit']:
+    def make(p: Visit, src: SourceName) -> Res[DbVisit]:
         try:
             # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
             if isinstance(p.dt, datetime):
@@ -171,7 +173,7 @@ class DbVisit(NamedTuple):
                 # TODO that won't be with timezone..
                 dt = datetime.combine(p.dt, datetime.min.time()) # meh..
             else:
-                raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
+                raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')  # noqa: TRY301
         except Exception as e:
             return e
@@ -196,6 +198,7 @@ Filter = Callable[[Url], bool]
 from .logging import LazyLogger
 logger = LazyLogger('promnesia', level='DEBUG')
 def get_logger() -> logging.Logger:
@@ -204,7 +207,6 @@ def get_logger() -> logging.Logger:
-import tempfile
 # kinda singleton
 @lru_cache(1)
 def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
@@ -218,7 +220,7 @@ Syntax = str
 @lru_cache(None)
 def _get_urlextractor(syntax: Syntax):
-    from urlextract import URLExtract # type: ignore
+    from urlextract import URLExtract  # type: ignore
     u = URLExtract()
     # https://github.com/lipoja/URLExtract/issues/13
     if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
@@ -249,7 +251,7 @@ def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
         yield _sanitize(u)
-def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
+def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]:
     return list(iter_urls(s=s, syntax=syntax))
@@ -274,7 +276,7 @@ class PathWithMtime(NamedTuple):
     mtime: float
     @classmethod
-    def make(cls, p: Path) -> 'PathWithMtime':
+    def make(cls, p: Path) -> PathWithMtime:
         return cls(
             path=p,
             mtime=p.stat().st_mtime,
@@ -300,7 +302,7 @@ def _guess_name(thing: PreSource) -> str:
         guess = thing.__module__
     dflt = 'promnesia.sources.'
-    guess = removeprefix(guess, prefix=dflt)
+    guess = guess.removeprefix(dflt)
     if guess == 'config':
         # this happens when we define a lambda in config or something without properly wrapping in Source
         logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
@@ -362,13 +364,14 @@ Indexer = Source
 # NOTE: used in configs...
 def last(path: PathIsh, *parts: str) -> Path:
     import os.path
-    pp = os.path.join(str(path), *parts)
-    return Path(max(glob(pp, recursive=True)))
+    pp = os.path.join(str(path), *parts)  # noqa: PTH118
+    return Path(max(glob(pp, recursive=True)))  # noqa: PTH207
-from .logging import setup_logger
+from .logging import setup_logger  # noqa: F401
-from copy import copy
+# TODO get rid of this? not sure if still necessary
 def echain(ex: Exception, cause: Exception) -> Exception:
     e = copy(ex)
     e.__cause__ = cause
@@ -382,7 +385,6 @@ def echain(ex: Exception, cause: Exception) -> Exception:
 def slugify(x: str) -> str:
     # https://stackoverflow.com/a/38766141/706389
-    import re
     valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
     return valid_file_name
@@ -392,7 +394,7 @@ def appdirs():
     under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
     # todo actually use test name?
     name = 'promnesia-test' if under_test else 'promnesia'
-    import appdirs as ad # type: ignore[import-untyped]
+    import appdirs as ad  # type: ignore[import-untyped]
     return ad.AppDirs(appname=name)
@@ -409,13 +411,13 @@ def default_cache_dir() -> Path:
 # make it lazy, otherwise it might crash on module import (e.g. on Windows)
 # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
 @lru_cache(1)
-def _magic() -> Callable[[PathIsh], Optional[str]]:
+def _magic() -> Callable[[PathIsh], str | None]:
     logger = get_logger()
     try:
-        import magic # type: ignore
+        import magic  # type: ignore
     except Exception as e:
         logger.exception(e)
-        defensive_msg: Optional[str] = None
+        defensive_msg: str | None = None
         if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
             defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
         elif isinstance(e, ImportError):
@@ -425,7 +427,7 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
         if defensive_msg is not None:
             logger.warning(defensive_msg)
             warnings.warn(defensive_msg)
-            return lambda path: None # stub
+            return lambda path: None  # stub  # noqa: ARG005
         else:
             raise e
     else:
@@ -441,7 +443,7 @@ def _mimetypes():
     return mimetypes
-def mime(path: PathIsh) -> Optional[str]:
+def mime(path: PathIsh) -> str | None:
     ps = str(path)
     mimetypes = _mimetypes()
     # first try mimetypes, it's only using the filename without opening the file
@@ -453,7 +455,7 @@ def mime(path: PathIsh) -> Optional[str]:
     return magic(ps)
-def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     prune_dir_args = []
     ignore_file_args = []
     if ignore:
@@ -476,7 +478,7 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
     ]
-def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     from .config import extra_fd_args
     ignore_args = []
@@ -496,7 +498,7 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
     ]
-def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
+def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]:
     if not root.is_dir():
         yield root
         return
@@ -605,3 +607,8 @@ def is_sqlite_db(x: Path) -> bool:
         'application/vnd.sqlite3',
         # TODO this mime can also match wal files/journals, not sure
     }
+if not TYPE_CHECKING:
+    # todo deprecate properly --just backwards compat
+    from .compat import removeprefix  # noqa: F401

promnesia/compare.py CHANGED Viewed

@@ -1,13 +1,14 @@
-#!/usr/bin/env python3
+from __future__ import annotations
 # TODO perhaps make it external script?
 import argparse
-from pathlib import Path
 import logging
 import sys
-from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
+from collections.abc import Iterator, Sequence
+from pathlib import Path
+from typing import TypeVar
-from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
+from .common import DbVisit, PathWithMtime, Url
 from .database.load import row_to_db_visit
 # TODO include latest too?
@@ -19,14 +20,11 @@ def get_logger():
 # TODO return error depending on severity?
-from typing import TypeVar, Sequence
 T = TypeVar('T')
 def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
-    def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
-        res: Dict[str, List[T]] = {}
+    def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
+        res: dict[str, list[T]] = {}
         for a in s:
             k = key(a)
             ll = res.get(k, None)
@@ -39,9 +37,9 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
     db = make_dict(sb)
     ka = set(da.keys())
     kb = set(db.keys())
-    onlya: Set[T] = set()
-    common: Set[T] = set()
-    onlyb: Set[T] = set()
+    onlya: set[T] = set()
+    common: set[T] = set()
+    onlyb: set[T] = set()
     for k in ka.union(kb):
         la = da.get(k, [])
         lb = db.get(k, [])
@@ -54,13 +52,13 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
     return onlya, common, onlyb
-def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
+def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
     logger = get_logger()
     logger.info('comparing between: %s', between)
-    errors: List[DbVisit] = []
+    errors: list[DbVisit] = []
-    umap: Dict[Url, List[DbVisit]] = {}
+    umap: dict[Url, list[DbVisit]] = {}
     for a in after:
         url = a.norm_url
         xx = umap.get(url, []) # TODO canonify here?
@@ -71,7 +69,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
         errors.append(b)
         if log:
             logger.error('between %s missing %s', between, b)
-            print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
+            print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
     # the idea is that we eliminate items simultaneously from both sets
@@ -108,7 +106,7 @@ def get_files(args):
     if len(args.paths) == 0:
         int_dir = args.intermediate_dir
         assert int_dir.exists()
-        files = list(sorted(int_dir.glob('*.sqlite*')))
+        files = sorted(int_dir.glob('*.sqlite*'))
         files = files[-args.last:]
     else:
         files = [Path(p) for p in args.paths]
@@ -126,7 +124,7 @@ def main():
         sys.exit(1)
-def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
+def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
     assert len(files) > 0
     logger = get_logger()
@@ -139,7 +137,7 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
         name = f.name
         this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
-        from promnesia.server import _get_stuff # TODO ugh
+        from promnesia.server import _get_stuff  # TODO ugh
         engine, table = _get_stuff(PathWithMtime.make(f))
         with engine.connect() as conn:

promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

promnesia 1.2.20240810py3-none-any.whl → 1.3.20241021py3-none-any.whl