PyPI - promnesia - Versions diffs - 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl - Mend

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

promnesia/__init__.py +14 -3
promnesia/__main__.py +60 -35
promnesia/cannon.py +27 -27
promnesia/common.py +85 -67
promnesia/compare.py +21 -22
promnesia/compat.py +10 -10
promnesia/config.py +23 -23
promnesia/database/common.py +67 -0
promnesia/database/dump.py +188 -0
promnesia/{read_db.py → database/load.py} +16 -17
promnesia/extract.py +14 -11
promnesia/kjson.py +12 -11
promnesia/logging.py +4 -4
promnesia/misc/__init__.pyi +0 -0
promnesia/misc/config_example.py +1 -2
promnesia/misc/install_server.py +7 -9
promnesia/server.py +57 -47
promnesia/sources/__init__.pyi +0 -0
promnesia/sources/auto.py +50 -35
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +14 -9
promnesia/sources/browser_legacy.py +26 -16
promnesia/sources/demo.py +19 -3
promnesia/sources/fbmessenger.py +3 -2
promnesia/sources/filetypes.py +16 -7
promnesia/sources/github.py +7 -9
promnesia/sources/guess.py +2 -1
promnesia/sources/hackernews.py +2 -2
promnesia/sources/hpi.py +2 -2
promnesia/sources/html.py +7 -5
promnesia/sources/hypothesis.py +4 -3
promnesia/sources/instapaper.py +2 -2
promnesia/sources/markdown.py +31 -21
promnesia/sources/org.py +27 -13
promnesia/sources/plaintext.py +30 -29
promnesia/sources/pocket.py +3 -2
promnesia/sources/reddit.py +20 -19
promnesia/sources/roamresearch.py +2 -1
promnesia/sources/rss.py +4 -5
promnesia/sources/shellcmd.py +19 -6
promnesia/sources/signal.py +33 -24
promnesia/sources/smscalls.py +2 -2
promnesia/sources/stackexchange.py +4 -3
promnesia/sources/takeout.py +76 -9
promnesia/sources/takeout_legacy.py +24 -12
promnesia/sources/telegram.py +13 -11
promnesia/sources/telegram_legacy.py +18 -7
promnesia/sources/twitter.py +6 -5
promnesia/sources/vcs.py +5 -3
promnesia/sources/viber.py +10 -9
promnesia/sources/website.py +4 -4
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +7 -4
promnesia/tests/__init__.py +0 -0
promnesia/tests/common.py +140 -0
promnesia/tests/server_helper.py +67 -0
promnesia/tests/sources/__init__.py +0 -0
promnesia/tests/sources/test_auto.py +65 -0
promnesia/tests/sources/test_filetypes.py +43 -0
promnesia/tests/sources/test_hypothesis.py +39 -0
promnesia/tests/sources/test_org.py +64 -0
promnesia/tests/sources/test_plaintext.py +25 -0
promnesia/tests/sources/test_shellcmd.py +21 -0
promnesia/tests/sources/test_takeout.py +56 -0
promnesia/tests/test_cannon.py +325 -0
promnesia/tests/test_cli.py +40 -0
promnesia/tests/test_compare.py +30 -0
promnesia/tests/test_config.py +289 -0
promnesia/tests/test_db_dump.py +222 -0
promnesia/tests/test_extract.py +65 -0
promnesia/tests/test_extract_urls.py +43 -0
promnesia/tests/test_indexer.py +251 -0
promnesia/tests/test_server.py +291 -0
promnesia/tests/test_traverse.py +39 -0
promnesia/tests/utils.py +35 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
promnesia-1.3.20241021.dist-info/RECORD +83 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
promnesia/dump.py +0 -105
promnesia-1.2.20230515.dist-info/RECORD +0 -58
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0

promnesia/common.py CHANGED Viewed

@@ -1,26 +1,29 @@
 from __future__ import annotations
-from contextlib import contextmanager
-from datetime import datetime, date
-from functools import lru_cache
-from glob import glob
 import itertools
 import logging
 import os
-from pathlib import Path
+import re
 import shutil
-from subprocess import run, PIPE, Popen
+import tempfile
+import warnings
+from collections.abc import Iterable, Sequence
+from contextlib import contextmanager
+from copy import copy
+from datetime import date, datetime
+from functools import lru_cache
+from glob import glob
+from pathlib import Path
+from subprocess import PIPE, Popen, run
 from timeit import default_timer as timer
 from types import ModuleType
-from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
-import warnings
+from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, TypeVar, Union
-from more_itertools import intersperse
 import pytz
+from more_itertools import intersperse
 from .cannon import canonify
 _is_windows = os.name == 'nt'
 T = TypeVar('T')
@@ -37,14 +40,14 @@ Second = int
 # TODO hmm. arguably, source and context are almost same things...
 class Loc(NamedTuple):
     title: str
-    href: Optional[str]=None
+    href: Optional[str] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
     @classmethod
-    def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
+    def make(cls, title: str, href: str | None=None) -> Loc:
         return cls(title=title, href=href)
     @classmethod
-    def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
+    def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc:
         lstr = '' if line is None else f':{line}'
         # todo loc should be url encoded? dunno.
         # or use line=? eh. I don't know. Just ask in issues.
@@ -76,13 +79,26 @@ class Loc(NamedTuple):
     # but generally, it will be
     # (url|file)(linenumber|json_path|anchor)
+@lru_cache(None)
+def warn_once(message: str) -> None:
+    # you'd think that warnings module already logs warnings only once per line..
+    # but sadly it's not the case
+    # see https://github.com/karlicoss/python_duplicate_warnings_investigation/blob/master/test.py
+    warnings.warn(message, stacklevel=2)
+def _warn_no_xdg_mime() -> None:
+    warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
 @lru_cache(1)
 def _detect_mime_handler() -> str:
     def exists(what: str) -> bool:
         try:
-            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
-        except FileNotFoundError:
-            warnings.warn("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
+            r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
+        except (FileNotFoundError, NotADirectoryError):  # ugh seems that osx might throw NotADirectory for some reason
+            _warn_no_xdg_mime()
             return False
         if r.returncode > 0:
             warnings.warn('xdg-mime failed') # hopefully rest is in stderr
@@ -102,6 +118,7 @@ def _detect_mime_handler() -> str:
         result = 'emacs:'
     # 2. now try to use newer editor:// thing
+    # TODO flip order here? should rely on editor:// first?
     # TODO would be nice to collect warnings and display at the end
     if not exists('editor'):
@@ -124,12 +141,12 @@ class Visit(NamedTuple):
     # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
     dt: datetime
     locator: Loc
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    context: Context | None = None
+    duration: Second | None = None
     # TODO shit. I need to insert it in chrome db....
     # TODO gonna be hard to fill retroactively.
     # spent: Optional[Second] = None
-    debug: Optional[str] = None
+    debug: str | None = None
 Result = Union[Visit, Exception]
 Results = Iterable[Result]
@@ -142,12 +159,12 @@ class DbVisit(NamedTuple):
     orig_url: Url
     dt: datetime
     locator: Loc
-    src: Optional[SourceName] = None
-    context: Optional[Context] = None
-    duration: Optional[Second] = None
+    src: Optional[SourceName] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
+    context: Optional[Context] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
+    duration: Optional[Second] = None  # noqa: UP007  # looks like hypothesis doesn't like in on python <= 3.9
     @staticmethod
-    def make(p: Visit, src: SourceName) -> Res['DbVisit']:
+    def make(p: Visit, src: SourceName) -> Res[DbVisit]:
         try:
             # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
             if isinstance(p.dt, datetime):
@@ -156,7 +173,7 @@ class DbVisit(NamedTuple):
                 # TODO that won't be with timezone..
                 dt = datetime.combine(p.dt, datetime.min.time()) # meh..
             else:
-                raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
+                raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')  # noqa: TRY301
         except Exception as e:
             return e
@@ -181,6 +198,7 @@ Filter = Callable[[Url], bool]
 from .logging import LazyLogger
 logger = LazyLogger('promnesia', level='DEBUG')
 def get_logger() -> logging.Logger:
@@ -189,7 +207,6 @@ def get_logger() -> logging.Logger:
-import tempfile
 # kinda singleton
 @lru_cache(1)
 def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
@@ -203,7 +220,7 @@ Syntax = str
 @lru_cache(None)
 def _get_urlextractor(syntax: Syntax):
-    from urlextract import URLExtract # type: ignore
+    from urlextract import URLExtract  # type: ignore
     u = URLExtract()
     # https://github.com/lipoja/URLExtract/issues/13
     if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
@@ -234,7 +251,7 @@ def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
         yield _sanitize(u)
-def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
+def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]:
     return list(iter_urls(s=s, syntax=syntax))
@@ -259,7 +276,7 @@ class PathWithMtime(NamedTuple):
     mtime: float
     @classmethod
-    def make(cls, p: Path) -> 'PathWithMtime':
+    def make(cls, p: Path) -> PathWithMtime:
         return cls(
             path=p,
             mtime=p.stat().st_mtime,
@@ -285,9 +302,10 @@ def _guess_name(thing: PreSource) -> str:
         guess = thing.__module__
     dflt = 'promnesia.sources.'
-    if guess.startswith(dflt):
-        # meh
-        guess = guess[len(dflt):]
+    guess = guess.removeprefix(dflt)
+    if guess == 'config':
+        # this happens when we define a lambda in config or something without properly wrapping in Source
+        logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
     return guess
@@ -297,7 +315,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
     if hasattr(sourceish, 'index'):  # must be a module
         res = getattr(sourceish, 'index')
     else:
-        res = sourceish  # type: ignore[assignment]
+        res = sourceish
     return res
@@ -317,12 +335,17 @@ class Source:
         self.extractor: Extractor = lambda: self.ff(*self.args, **self.kwargs)
         if src is not None:
             warnings.warn("'src' argument is deprecated, please use 'name' instead", DeprecationWarning)
-        try:
-            name_guess = _guess_name(ff)
-        except:
-            # todo warn?
-            name_guess = ''
-        self.name = name or src or name_guess
+        if name != '':
+            self.name = name
+        elif src != '':
+            self.name = src
+        else:
+            try:
+                name_guess = _guess_name(ff)
+            except:
+                # todo warn?
+                name_guess = ''
+            self.name = name_guess
     @property
     def description(self) -> str:
@@ -341,13 +364,14 @@ Indexer = Source
 # NOTE: used in configs...
 def last(path: PathIsh, *parts: str) -> Path:
     import os.path
-    pp = os.path.join(str(path), *parts)
-    return Path(max(glob(pp, recursive=True)))
+    pp = os.path.join(str(path), *parts)  # noqa: PTH118
+    return Path(max(glob(pp, recursive=True)))  # noqa: PTH207
-from .logging import setup_logger
+from .logging import setup_logger  # noqa: F401
-from copy import copy
+# TODO get rid of this? not sure if still necessary
 def echain(ex: Exception, cause: Exception) -> Exception:
     e = copy(ex)
     e.__cause__ = cause
@@ -361,7 +385,6 @@ def echain(ex: Exception, cause: Exception) -> Exception:
 def slugify(x: str) -> str:
     # https://stackoverflow.com/a/38766141/706389
-    import re
     valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
     return valid_file_name
@@ -371,7 +394,7 @@ def appdirs():
     under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
     # todo actually use test name?
     name = 'promnesia-test' if under_test else 'promnesia'
-    import appdirs as ad # type: ignore[import]
+    import appdirs as ad  # type: ignore[import-untyped]
     return ad.AppDirs(appname=name)
@@ -388,13 +411,13 @@ def default_cache_dir() -> Path:
 # make it lazy, otherwise it might crash on module import (e.g. on Windows)
 # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
 @lru_cache(1)
-def _magic() -> Callable[[PathIsh], Optional[str]]:
+def _magic() -> Callable[[PathIsh], str | None]:
     logger = get_logger()
     try:
-        import magic # type: ignore
+        import magic  # type: ignore
     except Exception as e:
         logger.exception(e)
-        defensive_msg: Optional[str] = None
+        defensive_msg: str | None = None
         if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
             defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
         elif isinstance(e, ImportError):
@@ -404,7 +427,7 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
         if defensive_msg is not None:
             logger.warning(defensive_msg)
             warnings.warn(defensive_msg)
-            return lambda path: None # stub
+            return lambda path: None  # stub  # noqa: ARG005
         else:
             raise e
     else:
@@ -420,7 +443,7 @@ def _mimetypes():
     return mimetypes
-def mime(path: PathIsh) -> Optional[str]:
+def mime(path: PathIsh) -> str | None:
     ps = str(path)
     mimetypes = _mimetypes()
     # first try mimetypes, it's only using the filename without opening the file
@@ -432,7 +455,7 @@ def mime(path: PathIsh) -> Optional[str]:
     return magic(ps)
-def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     prune_dir_args = []
     ignore_file_args = []
     if ignore:
@@ -455,19 +478,19 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
     ]
-def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
+def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
     from .config import extra_fd_args
     ignore_args = []
     if ignore:
         # Add a statement that excludes the folder
-        ignore_args = [['--exclude', f'{n}'] for n in ignore]
+        _ignore_args = [['--exclude', f'{n}'] for n in ignore]
         # Flatten the list of lists
-        ignore_args_l = list(itertools.chain(*ignore_args))
+        ignore_args = list(itertools.chain(*_ignore_args))
     return [
         *extra_fd_args(),
-        *ignore_args_l,
+        *ignore_args,
         *(['--follow'] if follow else []),
         '--type', 'f',
         '.',
@@ -475,7 +498,7 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
     ]
-def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
+def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]:
     if not root.is_dir():
         yield root
         return
@@ -516,17 +539,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
 def get_system_zone() -> str:
     try:
         import tzlocal
-        # note: tzlocal mypy stubs aren't aware of api change yet (see https://github.com/python/typeshed/issues/6038)
-        try:
-            # 4.0 way
-            return tzlocal.get_localzone_name() # type: ignore[attr-defined]
-        except AttributeError as e:
-            # 2.0 way
-            zone = tzlocal.get_localzone().zone  # type: ignore[attr-defined]
-            # see https://github.com/python/typeshed/blame/968fd6d01d23470e0c8368e7ee7c43f54aaedc0e/stubs/pytz/pytz/tzinfo.pyi#L6
-            # it says all concrete instances should not be None
-            assert zone is not None
-            return zone
+        return tzlocal.get_localzone_name()
     except Exception as e:
         logger.exception(e)
         logger.error("Couldn't determine system timezone. Falling back to UTC. Please report this as a bug!")
@@ -540,7 +553,7 @@ def get_system_tz() -> pytz.BaseTzInfo:
         return pytz.timezone(zone)
     except Exception as e:
         logger.exception(e)
-        logger.error(f"Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
+        logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
         return pytz.utc
 # used in misc/install_server.py
@@ -594,3 +607,8 @@ def is_sqlite_db(x: Path) -> bool:
         'application/vnd.sqlite3',
         # TODO this mime can also match wal files/journals, not sure
     }
+if not TYPE_CHECKING:
+    # todo deprecate properly --just backwards compat
+    from .compat import removeprefix  # noqa: F401

promnesia/compare.py CHANGED Viewed

@@ -1,13 +1,15 @@
-#!/usr/bin/env python3
+from __future__ import annotations
 # TODO perhaps make it external script?
 import argparse
-from pathlib import Path
 import logging
 import sys
-from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
+from collections.abc import Iterator, Sequence
+from pathlib import Path
+from typing import TypeVar
-from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
+from .common import DbVisit, PathWithMtime, Url
+from .database.load import row_to_db_visit
 # TODO include latest too?
 # from cconfig import ignore, filtered
@@ -18,14 +20,11 @@ def get_logger():
 # TODO return error depending on severity?
-from typing import TypeVar, Sequence
 T = TypeVar('T')
 def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
-    def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
-        res: Dict[str, List[T]] = {}
+    def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
+        res: dict[str, list[T]] = {}
         for a in s:
             k = key(a)
             ll = res.get(k, None)
@@ -38,9 +37,9 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
     db = make_dict(sb)
     ka = set(da.keys())
     kb = set(db.keys())
-    onlya: Set[T] = set()
-    common: Set[T] = set()
-    onlyb: Set[T] = set()
+    onlya: set[T] = set()
+    common: set[T] = set()
+    onlyb: set[T] = set()
     for k in ka.union(kb):
         la = da.get(k, [])
         lb = db.get(k, [])
@@ -53,13 +52,13 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
     return onlya, common, onlyb
-def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
+def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
     logger = get_logger()
     logger.info('comparing between: %s', between)
-    errors: List[DbVisit] = []
+    errors: list[DbVisit] = []
-    umap: Dict[Url, List[DbVisit]] = {}
+    umap: dict[Url, list[DbVisit]] = {}
     for a in after:
         url = a.norm_url
         xx = umap.get(url, []) # TODO canonify here?
@@ -70,7 +69,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
         errors.append(b)
         if log:
             logger.error('between %s missing %s', between, b)
-            print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
+            print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
     # the idea is that we eliminate items simultaneously from both sets
@@ -107,7 +106,7 @@ def get_files(args):
     if len(args.paths) == 0:
         int_dir = args.intermediate_dir
         assert int_dir.exists()
-        files = list(sorted(int_dir.glob('*.sqlite*')))
+        files = sorted(int_dir.glob('*.sqlite*'))
         files = files[-args.last:]
     else:
         files = [Path(p) for p in args.paths]
@@ -125,7 +124,7 @@ def main():
         sys.exit(1)
-def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
+def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
     assert len(files) > 0
     logger = get_logger()
@@ -138,11 +137,11 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
         name = f.name
         this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
-        from promnesia.server import _get_stuff # TODO ugh
-        engine, binder, table = _get_stuff(PathWithMtime.make(f))
+        from promnesia.server import _get_stuff  # TODO ugh
+        engine, table = _get_stuff(PathWithMtime.make(f))
         with engine.connect() as conn:
-            vis = [binder.from_row(row) for row in conn.execute(table.select())]  # type: ignore[var-annotated]
+            vis = [row_to_db_visit(row) for row in conn.execute(table.select())]
         if last is not None:
             between = f'{last_dts}:{this_dts}'

promnesia/compat.py CHANGED Viewed

@@ -1,12 +1,12 @@
-## we used to have compat fixes here for these for python3.7
-## keeping in case any sources depended on compat functions
-from subprocess import PIPE, run, check_call, check_output, Popen
-from typing import Protocol, Literal
-##
+from typing import TYPE_CHECKING
+if not TYPE_CHECKING:
+    ## we used to have compat fixes here for these for python3.7
+    ## keeping in case any sources depended on compat functions
+    from subprocess import PIPE, Popen, check_call, check_output, run  # noqa: F401
+    from typing import Literal, Protocol  # noqa: F401
+    ##
-# can remove after python3.9
-def removeprefix(text: str, prefix: str) -> str:
-    if text.startswith(prefix):
-        return text[len(prefix):]
-    return text
+    # todo deprecate properly
+    def removeprefix(text: str, prefix: str) -> str:
+        return text.removeprefix(prefix)

promnesia/config.py CHANGED Viewed

@@ -1,21 +1,19 @@
-from pathlib import Path
-import os
-from types import ModuleType
-from typing import List, Optional, Union, NamedTuple, Iterable, Callable
+from __future__ import annotations
 import importlib
 import importlib.util
+import os
 import warnings
+from collections.abc import Iterable
+from pathlib import Path
+from types import ModuleType
+from typing import Callable, NamedTuple, Union
-from .common import PathIsh, get_tmpdir, appdirs, default_output_dir, default_cache_dir, user_config_file
-from .common import Res, Source, DbVisit
+from .common import DbVisit, PathIsh, Res, Source, default_cache_dir, default_output_dir
 HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]
-from typing import Any
 ModuleName = str
 # something that can be converted into a proper Source
@@ -24,19 +22,19 @@ ConfigSource = Union[Source, ModuleName, ModuleType]
 class Config(NamedTuple):
     # TODO remove default from sources once migrated
-    SOURCES: List[ConfigSource] = []
+    SOURCES: list[ConfigSource] = []
     # if not specified, uses user data dir
-    OUTPUT_DIR: Optional[PathIsh] = None
+    OUTPUT_DIR: PathIsh | None = None
-    CACHE_DIR: Optional[PathIsh] = ''
-    FILTERS: List[str] = []
+    CACHE_DIR: PathIsh | None = ''
+    FILTERS: list[str] = []
-    HOOK: Optional[HookT] = None
+    HOOK: HookT | None = None
     #
     # NOTE: INDEXERS is deprecated, use SOURCES instead
-    INDEXERS: List[ConfigSource] = []
+    INDEXERS: list[ConfigSource] = []
     #MIME_HANDLER: Optional[str] = None # TODO
     @property
@@ -68,9 +66,11 @@ class Config(NamedTuple):
                 yield Source(r)
     @property
-    def cache_dir(self) -> Optional[Path]:
+    def cache_dir(self) -> Path | None:
+        # TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
+        # keeping just in case for now
         cd = self.CACHE_DIR
-        cpath: Optional[Path]
+        cpath: Path | None
         if cd is None:
             cpath = None # means 'disabled' in cachew
         elif cd == '': # meh.. but need to make it None friendly..
@@ -94,10 +94,10 @@ class Config(NamedTuple):
         return self.output_dir / 'promnesia.sqlite'
     @property
-    def hook(self) -> Optional[HookT]:
+    def hook(self) -> HookT | None:
         return self.HOOK
-instance: Optional[Config] = None
+instance: Config | None = None
 def has() -> bool:
@@ -127,7 +127,7 @@ def import_config(config_file: PathIsh) -> Config:
     spec = importlib.util.spec_from_file_location(name, p); assert spec is not None
     mod = importlib.util.module_from_spec(spec); assert mod is not None
     loader = spec.loader; assert loader is not None
-    loader.exec_module(mod) # type: ignore[attr-defined]
+    loader.exec_module(mod)
     d = {}
     for f in Config._fields:
@@ -137,7 +137,7 @@ def import_config(config_file: PathIsh) -> Config:
 # TODO: ugh. this causes warnings to be repeated multiple times... need to reuse the pool or something..
-def use_cores() -> Optional[int]:
+def use_cores() -> int | None:
     '''
     Somewhat experimental.
     For now only used in sources.auto, perhaps later will be shared among the other indexers.
@@ -152,7 +152,7 @@ def use_cores() -> Optional[int]:
         return 0
-def extra_fd_args() -> List[str]:
+def extra_fd_args() -> list[str]:
     '''
     Not sure where it belongs yet... so via env variable for now
     Can be used to pass --ignore-file parameter

promnesia/database/common.py ADDED Viewed

@@ -0,0 +1,67 @@
+from __future__ import annotations
+from collections.abc import Sequence
+from datetime import datetime
+from sqlalchemy import (
+    Column,
+    Integer,
+    String,
+)
+# TODO maybe later move DbVisit here completely?
+# kinda an issue that it's technically an "api" because hook in config can patch up DbVisit
+from ..common import DbVisit, Loc
+def get_columns() -> Sequence[Column]:
+    # fmt: off
+    res: Sequence[Column] = [
+        Column('norm_url'     , String()),
+        Column('orig_url'     , String()),
+        Column('dt'           , String()),
+        Column('locator_title', String()),
+        Column('locator_href' , String()),
+        Column('src'          , String()),
+        Column('context'      , String()),
+        Column('duration'     , Integer())
+    ]
+    # fmt: on
+    assert len(res) == len(DbVisit._fields) + 1  # +1 because Locator is 'flattened'
+    return res
+def db_visit_to_row(v: DbVisit) -> tuple:
+    # ugh, very hacky...
+    # we want to make sure the resulting tuple only consists of simple types
+    # so we can use dbengine directly
+    dt_s = v.dt.isoformat()
+    row = (
+        v.norm_url,
+        v.orig_url,
+        dt_s,
+        v.locator.title,
+        v.locator.href,
+        v.src,
+        v.context,
+        v.duration,
+    )
+    return row
+def row_to_db_visit(row: Sequence) -> DbVisit:
+    (norm_url, orig_url, dt_s, locator_title, locator_href, src, context, duration) = row
+    dt_s = dt_s.split()[0]  # backwards compatibility: previously it could be a string separated with tz name
+    dt = datetime.fromisoformat(dt_s)
+    return DbVisit(
+        norm_url=norm_url,
+        orig_url=orig_url,
+        dt=dt,
+        locator=Loc(
+            title=locator_title,
+            href=locator_href,
+        ),
+        src=src,
+        context=context,
+        duration=duration,
+    )

promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl