PyPI - promnesia - Versions diffs - 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl - Mend

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

promnesia/__init__.py +14 -3
promnesia/__main__.py +60 -35
promnesia/cannon.py +27 -27
promnesia/common.py +85 -67
promnesia/compare.py +21 -22
promnesia/compat.py +10 -10
promnesia/config.py +23 -23
promnesia/database/common.py +67 -0
promnesia/database/dump.py +188 -0
promnesia/{read_db.py → database/load.py} +16 -17
promnesia/extract.py +14 -11
promnesia/kjson.py +12 -11
promnesia/logging.py +4 -4
promnesia/misc/__init__.pyi +0 -0
promnesia/misc/config_example.py +1 -2
promnesia/misc/install_server.py +7 -9
promnesia/server.py +57 -47
promnesia/sources/__init__.pyi +0 -0
promnesia/sources/auto.py +50 -35
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +14 -9
promnesia/sources/browser_legacy.py +26 -16
promnesia/sources/demo.py +19 -3
promnesia/sources/fbmessenger.py +3 -2
promnesia/sources/filetypes.py +16 -7
promnesia/sources/github.py +7 -9
promnesia/sources/guess.py +2 -1
promnesia/sources/hackernews.py +2 -2
promnesia/sources/hpi.py +2 -2
promnesia/sources/html.py +7 -5
promnesia/sources/hypothesis.py +4 -3
promnesia/sources/instapaper.py +2 -2
promnesia/sources/markdown.py +31 -21
promnesia/sources/org.py +27 -13
promnesia/sources/plaintext.py +30 -29
promnesia/sources/pocket.py +3 -2
promnesia/sources/reddit.py +20 -19
promnesia/sources/roamresearch.py +2 -1
promnesia/sources/rss.py +4 -5
promnesia/sources/shellcmd.py +19 -6
promnesia/sources/signal.py +33 -24
promnesia/sources/smscalls.py +2 -2
promnesia/sources/stackexchange.py +4 -3
promnesia/sources/takeout.py +76 -9
promnesia/sources/takeout_legacy.py +24 -12
promnesia/sources/telegram.py +13 -11
promnesia/sources/telegram_legacy.py +18 -7
promnesia/sources/twitter.py +6 -5
promnesia/sources/vcs.py +5 -3
promnesia/sources/viber.py +10 -9
promnesia/sources/website.py +4 -4
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +7 -4
promnesia/tests/__init__.py +0 -0
promnesia/tests/common.py +140 -0
promnesia/tests/server_helper.py +67 -0
promnesia/tests/sources/__init__.py +0 -0
promnesia/tests/sources/test_auto.py +65 -0
promnesia/tests/sources/test_filetypes.py +43 -0
promnesia/tests/sources/test_hypothesis.py +39 -0
promnesia/tests/sources/test_org.py +64 -0
promnesia/tests/sources/test_plaintext.py +25 -0
promnesia/tests/sources/test_shellcmd.py +21 -0
promnesia/tests/sources/test_takeout.py +56 -0
promnesia/tests/test_cannon.py +325 -0
promnesia/tests/test_cli.py +40 -0
promnesia/tests/test_compare.py +30 -0
promnesia/tests/test_config.py +289 -0
promnesia/tests/test_db_dump.py +222 -0
promnesia/tests/test_extract.py +65 -0
promnesia/tests/test_extract_urls.py +43 -0
promnesia/tests/test_indexer.py +251 -0
promnesia/tests/test_server.py +291 -0
promnesia/tests/test_traverse.py +39 -0
promnesia/tests/utils.py +35 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
promnesia-1.3.20241021.dist-info/RECORD +83 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
promnesia/dump.py +0 -105
promnesia-1.2.20230515.dist-info/RECORD +0 -58
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0

promnesia/server.py CHANGED Viewed

@@ -1,35 +1,45 @@
-#!/usr/bin/python3
 from __future__ import annotations
-__package__ = 'promnesia'  # ugh. hacky way to make wsgi runner work properly...
 import argparse
-from dataclasses import dataclass
-from datetime import timedelta
-from functools import lru_cache
+import importlib.metadata
 import json
 import logging
 import os
+from dataclasses import dataclass
+from datetime import timedelta
+from functools import lru_cache
 from pathlib import Path
-from typing import List, NamedTuple, Dict, Optional, Any, Tuple, Protocol
+from typing import Any, NamedTuple, Optional, Protocol
+import fastapi
 import pytz
 from pytz import BaseTzInfo
-import fastapi
-from sqlalchemy import MetaData, exists, literal, between, or_, and_, exc, select
-from sqlalchemy import Column, Table, func, types
-from sqlalchemy.sql.elements import ColumnElement
+from sqlalchemy import (
+    Column,
+    Table,
+    and_,
+    between,
+    exc,
+    func,
+    literal,
+    or_,
+    select,
+    types,
+)
 from sqlalchemy.sql import text
+from sqlalchemy.sql.elements import ColumnElement
-from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz
 from .cannon import canonify
+from .common import (
+    DbVisit,
+    PathWithMtime,
+    default_output_dir,
+    get_system_tz,
+    setup_logger,
+)
+from .database.load import DbStuff, get_db_stuff, row_to_db_visit
-Json = Dict[str, Any]
+Json = dict[str, Any]
 app = fastapi.FastAPI()
@@ -51,8 +61,7 @@ def get_logger() -> logging.Logger:
 def get_version() -> str:
-    from pkg_resources import get_distribution
-    return get_distribution(__package__).version
+    return importlib.metadata.version(__package__)
 class ServerConfig(NamedTuple):
@@ -66,7 +75,7 @@ class ServerConfig(NamedTuple):
         })
     @classmethod
-    def from_str(cls, cfgs: str) -> 'ServerConfig':
+    def from_str(cls, cfgs: str) -> ServerConfig:
         d = json.loads(cfgs)
         return cls(
             db      =Path         (d['db']),
@@ -112,15 +121,13 @@ def as_json(v: DbVisit) -> Json:
     }
-def get_db_path(check: bool=True) -> Path:
+def get_db_path(*, check: bool=True) -> Path:
     db = EnvConfig.get().db
     if check:
         assert db.exists(), db
     return db
-from .read_db import DbStuff, get_db_stuff
 @lru_cache(1)
 # PathWithMtime aids lru_cache in reloading the sqlalchemy binder
 def _get_stuff(db_path: PathWithMtime) -> DbStuff:
@@ -128,7 +135,7 @@ def _get_stuff(db_path: PathWithMtime) -> DbStuff:
     return get_db_stuff(db_path=db_path.path)
-def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
+def get_stuff(db_path: Path | None=None) -> DbStuff: # TODO better name
     # ok, it will always load from the same db file; but intermediate would be kinda an optional dump.
     if db_path is None:
         db_path = get_db_path()
@@ -136,10 +143,10 @@ def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
 def db_stats(db_path: Path) -> Json:
-    engine, binder, table = get_stuff(db_path)
+    engine, table = get_stuff(db_path)
     query = select(func.count()).select_from(table)
     with engine.connect() as conn:
-        total = list(conn.execute(query))[0][0]
+        [(total,)] = conn.execute(query)
     return {
         'total_visits': total,
     }
@@ -151,8 +158,8 @@ class Where(Protocol):
 @dataclass
 class VisitsResponse:
-    original_url: Url
-    normalised_url: Url
+    original_url: str
+    normalised_url: str
     visits: Any
@@ -167,7 +174,7 @@ def search_common(url: str, where: Where) -> VisitsResponse:
         url = original_url
     logger.info('normalised url: %s', url)
-    engine, binder, table = get_stuff()
+    engine, table = get_stuff()
     query = table.select().where(where(table=table, url=url))
     logger.debug('query: %s', query)
@@ -175,17 +182,17 @@ def search_common(url: str, where: Where) -> VisitsResponse:
     with engine.connect() as conn:
         try:
             # TODO make more defensive here
-            visits: List[DbVisit] = [binder.from_row(row) for row in conn.execute(query)]
+            visits: list[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
         except exc.OperationalError as e:
             if getattr(e, 'msg', None) == 'no such table: visits':
-                logger.warn('you may have to run indexer first!')
+                logger.warning('you may have to run indexer first!')
                 #result['visits'] = [{an error with a msg}] # TODO
                 #return result
             raise
     logger.debug('got %d visits from db', len(visits))
-    vlist: List[DbVisit] = []
+    vlist: list[DbVisit] = []
     for vis in visits:
         dt = vis.dt
         if dt.tzinfo is None: # FIXME need this for /visits endpoint as well?
@@ -228,10 +235,11 @@ def status() -> Json:
         logger.exception(e)
         stats = {'ERROR': str(e)}
-    version: Optional[str]
+    version: str | None
     try:
         version = get_version()
     except Exception as e:
+        logger.exception(e)
         version = None
     return {
@@ -241,10 +249,9 @@ def status() -> Json:
     }
-from dataclasses import dataclass
 @dataclass
 class VisitsRequest:
-    url: Url
+    url: str
 @app.get ('/visits', response_model=VisitsResponse)
 @app.post('/visits', response_model=VisitsResponse)
@@ -255,15 +262,17 @@ def visits(request: VisitsRequest) -> VisitsResponse:
         url=url,
         # odd, doesn't work just with: x or (y and z)
         where=lambda table, url: or_(
-            table.c.norm_url == url,  # exact match
-            and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True)) # + child visits, but only 'interesting' ones
+            # exact match
+            table.c.norm_url == url,
+            # + child visits, but only 'interesting' ones
+            and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True))  # noqa: E711
         ),
     )
 @dataclass
 class SearchRequest:
-    url: Url
+    url: str
 @app.get ('/search', response_model=VisitsResponse)
 @app.post('/search', response_model=VisitsResponse)
@@ -300,7 +309,7 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
     return search_common(
         url='http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this
-        where=lambda table, url: between(
+        where=lambda table, url: between(  # noqa: ARG005
             func.strftime(
                 '%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00
                 # this is a bit fragile, relies on cachew internal timestamp format, e.g.
@@ -323,25 +332,26 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
 _NO_VERSION = (0, 11, 14)
 _LATEST = (9999, 9999, 9999)
-def as_version(version: str) -> Tuple[int, int, int]:
+def as_version(version: str) -> tuple[int, int, int]:
     if version == '':
         return _NO_VERSION
     try:
         [v1, v2, v3] = map(int, version.split('.'))
-        return (v1, v2, v3)
     except Exception as e:
         logger = get_logger()
         logger.error('error while parsing version %s', version)
         logger.exception(e)
         return _LATEST
+    else:
+        return (v1, v2, v3)
 @dataclass
 class VisitedRequest:
-    urls: List[str]
+    urls: list[str]
     client_version: str = ''
-VisitedResponse = List[Optional[Json]]
+VisitedResponse = list[Optional[Json]]
 @app.get ('/visited', response_model=VisitedResponse)
 @app.post('/visited', response_model=VisitedResponse)
@@ -356,12 +366,12 @@ def visited(request: VisitedRequest) -> VisitedResponse:
     version = as_version(client_version)
     nurls = [canonify(u) for u in urls]
-    snurls = list(sorted(set(nurls)))
+    snurls = sorted(set(nurls))
     if len(snurls) == 0:
         return []
-    engine, binder, table = get_stuff()
+    engine, table = get_stuff()
     # sqlalchemy doesn't seem to support SELECT FROM (VALUES (...)) in its api
     # also doesn't support array binding...
@@ -389,7 +399,7 @@ SELECT queried, visits.*
     # brings down large queries to 50ms...
     with engine.connect() as conn:
         res = list(conn.execute(query))
-        present: Dict[str, Any] = {row[0]: binder.from_row(row[1:]) for row in res}
+        present: dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
     results = []
     for nu in nurls:
         r = present.get(nu, None)

promnesia/sources/__init__.pyi ADDED Viewed

File without changes

promnesia/sources/auto.py CHANGED Viewed

@@ -5,34 +5,46 @@
 - autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
 - autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
 """
+from __future__ import annotations
 import csv
-from concurrent.futures import ProcessPoolExecutor as Pool
-from contextlib import nullcontext
-from datetime import datetime
 import itertools
 import json
 import os
-from typing import Optional, Iterable, Union, List, Tuple, NamedTuple, Sequence, Iterator, Iterable, Callable, Any, Dict, Set
+from collections.abc import Iterable, Iterator, Sequence
+from concurrent.futures import ProcessPoolExecutor as Pool
+from contextlib import nullcontext
 from fnmatch import fnmatch
+from functools import wraps
 from pathlib import Path
-from functools import lru_cache, wraps
-import warnings
-import pytz
-from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
-from ..config import use_cores
+from typing import Any, Callable, NamedTuple, Optional
+from promnesia.common import (
+    Loc,
+    PathIsh,
+    Result,
+    Results,
+    Visit,
+    echain,
+    extract_urls,
+    file_mtime,
+    get_logger,
+    get_tmpdir,
+    logger,
+    mime,
+    traverse,
+    warn_once,
+)
+from promnesia.config import use_cores
-from .filetypes import EUrl
-from .auto_obsidian import obsidian_replacer
 from .auto_logseq import logseq_replacer
+from .auto_obsidian import obsidian_replacer
+from .filetypes import Ctx, EUrl
-def _collect(thing, path: List[str], result: List[EUrl]) -> None:
+def _collect(thing, path: list[str], result: list[EUrl]) -> None:
     if isinstance(thing, str):
-        ctx: Ctx = tuple(path) # type: ignore
+        ctx: Ctx = tuple(path)
         result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
     elif isinstance(thing, list):
         path.append('[]')
@@ -50,9 +62,9 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None:
 # TODO mm. okay, I suppose could use kython consuming thingy?..
-def collect_from(thing) -> List[EUrl]:
-    uuu: List[EUrl] = []
-    path: List[str] = []
+def collect_from(thing) -> list[EUrl]:
+    uuu: list[EUrl] = []
+    path: list[str] = []
     _collect(thing, path, uuu)
     return uuu
@@ -84,7 +96,7 @@ def _plaintext(path: Path) -> Results:
 def fallback(ex):
     """Falls back to plaintext in case of issues"""
-    fallback_active: Dict[Any, bool] = {}
+    fallback_active: dict[Any, bool] = {}
     @wraps(ex)
     def wrapped(path: Path):
         nonlocal fallback_active
@@ -98,7 +110,7 @@ def fallback(ex):
             except ModuleNotFoundError as me:
                 logger = get_logger()
                 logger.exception(me)
-                logger.warn('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
+                logger.warning('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
                 yield me
                 fallback_active[ex] = True
                 do_fallback = True
@@ -125,7 +137,7 @@ def _org(path: Path) -> Results:
     return org.extract_from_file(path)
-from .filetypes import TYPE2IDX, type2idx, IGNORE, CODE
+from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
 TYPE2IDX.update({
     'application/json': _json,
@@ -167,8 +179,8 @@ for t in CODE:
 Replacer = Optional[Callable[[str, str], str]]
 def index(
-        *paths: Union[PathIsh],
-        ignored: Union[Sequence[str], str]=(),
+        *paths: PathIsh,
+        ignored: Sequence[str] | str=(),
         follow: bool=True,
         replacer: Replacer=None,
 ) -> Results:
@@ -209,10 +221,10 @@ class Options(NamedTuple):
     # TODO option to add ignores? not sure..
     # TODO I don't like this replacer thing... think about removing it
     replacer: Replacer
-    root: Optional[Path]=None
+    root: Path | None=None
-def _index_file_aux(path: Path, opts: Options) -> Union[Exception, List[Result]]:
+def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
     # just a helper for the concurrent version (the generator isn't picklable)
     try:
         return list(_index_file(path, opts=opts))
@@ -247,7 +259,7 @@ def _index(path: Path, opts: Options) -> Results:
                 continue
             p = p.resolve()
-            if not os.path.exists(p):
+            if not os.path.exists(p):  # noqa: PTH110
                 logger.debug('ignoring %s: broken symlink?', p)
                 continue
@@ -265,8 +277,10 @@ def _index(path: Path, opts: Options) -> Results:
 Mime = str
-from .filetypes import Ex # meh
-def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
+from .filetypes import Ex  # meh
+def by_path(pp: Path) -> tuple[Ex | None, Mime | None]:
     suf = pp.suffix.lower()
     # firt check suffixes, it's faster
     s = type2idx(suf)
@@ -282,6 +296,8 @@ def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
 def _index_file(pp: Path, opts: Options) -> Results:
     logger = get_logger()
+    # TODO need to keep debug logs here...
+    # logger.info(f"indexing {pp}")
     # TODO use kompress?
     # TODO not even sure if it's used...
     suf = pp.suffix.lower()
@@ -307,18 +323,17 @@ def _index_file(pp: Path, opts: Options) -> Results:
     ip, pm = by_path(pp)
     if ip is None:
-        # TODO use warning (with mime/ext as key?)
-        # TODO only log once? # hmm..
+        # todo not really sure about using warnings vs yielding error here?
         msg = f'No extractor for suffix {suf}, mime {pm}'
-        warnings.warn(msg)
+        warn_once(msg)
         yield echain(ex, RuntimeError(msg))
         return
     logger.debug('indexing via %s: %s', ip.__name__, pp)
-    def indexer() -> Union[Urls, Results]:
+    def indexer() -> Urls | Results:
         # eh, annoying.. need to make more generic..
-        idx = ip(pp) # type: ignore
+        idx = ip(pp)
         try:
             yield from idx
         except Exception as e:
@@ -351,7 +366,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
             v = v._replace(locator=loc)
         if replacer is not None and root is not None:
-            upd: Dict[str, Any] = {}
+            upd: dict[str, Any] = {}
             href = v.locator.href
             if href is not None:
                 upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))

promnesia/sources/auto_logseq.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import os.path
 import urllib.parse
 def logseq_replacer(path: str, root: str) -> str:
-    if not path.startswith("editor://") or not (path.endswith('.md') or path.endswith('.org')):
+    if not path.startswith("editor://") or not (path.endswith((".md", ".org"))):
         return path
-    graph = os.path.basename(root)
-    page_name = os.path.basename(path).rsplit('.', 1)[0]
+    graph = os.path.basename(root)  # noqa: PTH119
+    page_name = os.path.basename(path).rsplit('.', 1)[0]  # noqa: PTH119
     encoded_page_name = urllib.parse.quote(page_name)
     uri = f"logseq://graph/{graph}?page={encoded_page_name}"
     return uri

promnesia/sources/auto_obsidian.py CHANGED Viewed

@@ -1,8 +1,8 @@
 def obsidian_replacer(p: str, r: str) -> str:
     if not p.startswith("editor://") or not p.endswith('.md'):
         return p
     path = p.split('/', 2)[-1]
     uri = f"obsidian://{path}"
     return uri

promnesia/sources/browser.py CHANGED Viewed

@@ -2,15 +2,18 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
 '''
+from __future__ import annotations
 import re
-from typing import Optional, Iterator, Any, TYPE_CHECKING
 import warnings
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Any
-from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db
+from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
-def index(p: Optional[PathIsh]=None) -> Results:
-    from . import hpi
+def index(p: PathIsh | None = None) -> Results:
+    from . import hpi  # noqa: F401,I001
     if p is None:
         from my.browser.all import history
@@ -24,10 +27,11 @@ def index(p: Optional[PathIsh]=None) -> Results:
     )
     try:
         yield from _index_new_with_adhoc_config(path=p)
-        return
     except Exception as e:
         logger.exception(e)
         warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
+    else:
+        return
     logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
     yield from _index_old(path=p)
@@ -35,11 +39,12 @@ def index(p: Optional[PathIsh]=None) -> Results:
 def _index_old(*, path: PathIsh) -> Results:
     from . import browser_legacy
     yield from browser_legacy.index(path)
 def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     ## previously, it was possible to index be called with multiple different db search paths
     ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
@@ -50,7 +55,7 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
     cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
     ##
-    from my.core.common import classproperty, Paths, get_files
+    from my.core.common import Paths, classproperty, get_files
     class config:
         class core:
             cache_dir = cache_override
@@ -75,8 +80,8 @@ else:
 def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
     for v in history:
-        desc: Optional[str] = None
-        duration: Optional[Second] = None
+        desc: str | None = None
+        duration: Second | None = None
         metadata = v.metadata
         if metadata is not None:
             desc = metadata.title

promnesia/sources/browser_legacy.py CHANGED Viewed

@@ -1,16 +1,23 @@
+from __future__ import annotations
+import sqlite3
 from datetime import datetime
 from pathlib import Path
 from urllib.parse import unquote
-import sqlite3
-from typing import List, Set
 import pytz
-from ..common import PathIsh, Results, Visit, Loc, logger, Second, is_sqlite_db
-from .. import config
+from promnesia import config
+from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
-# todo mcachew?
-from cachew import cachew
+try:
+    from cachew import cachew
+except ModuleNotFoundError as me:
+    if me.name != 'cachew':
+        raise me
+    # this module is legacy anyway, so just make it defensive
+    def cachew(*args, **kwargs):  # type: ignore[no-redef]
+        return lambda f: f
 def index(p: PathIsh) -> Results:
@@ -29,21 +36,21 @@ def index(p: PathIsh) -> Results:
-def _index_dbs(dbs: List[Path], cachew_name: str):
+def _index_dbs(dbs: list[Path], cachew_name: str):
     # TODO right... not ideal, need to think how to handle it properly...
     import sys
     sys.setrecursionlimit(5000)
     cache_dir = config.get().cache_dir
     cpath = None if cache_dir is None else cache_dir / cachew_name
-    emitted: Set = set()
+    emitted: set = set()
     yield from _index_dbs_aux(cpath, dbs, emitted=emitted)
 # todo wow, stack traces are ridiculous here...
 # todo hmm, feels like it should be a class or something?
-@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
-def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
+@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)  # noqa: ARG005
+def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results:
     if len(dbs) == 0:
         return
@@ -58,7 +65,7 @@ def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
             xs_was_cached = True
             logger.debug('seems that %d first items were previously cached', len(xs))
         if xs_was_cached:
-            key = (r.url, r.dt)
+            key = str(r) if isinstance(r, Exception) else (r.url, r.dt)
             assert key not in emitted, key # todo not sure if this assert is necessary?
             # hmm ok it might happen if we messed up with indexing individual db?
             # alternatively, could abuse it to avoid messing with 'emitted' in _index_db?
@@ -69,7 +76,7 @@ def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
         yield from _index_db(db, emitted=emitted)
-def _index_db(db: Path, emitted: Set):
+def _index_db(db: Path, emitted: set):
     logger.info('processing %s', db) # debug level?
     # todo schema check (not so critical for cachew though)
@@ -115,17 +122,20 @@ Col = str
 ColType = str
-from typing import Any, NamedTuple, Tuple, Union, Sequence, Optional
+from collections.abc import Sequence
+from typing import NamedTuple, Union
 class Schema(NamedTuple):
-    cols: Sequence[Tuple[Col, ColType]]
+    cols: Sequence[tuple[Col, ColType]]
     key: Sequence[str]
-SchemaCheck = Tuple[str, Union[str, Sequence[str]]] # todo Union: meh
+SchemaCheck = tuple[str, Union[str, Sequence[str]]] # todo Union: meh
 from dataclasses import dataclass
 # todo protocol?
 @dataclass
 class Extr:
@@ -173,7 +183,7 @@ class Chrome(Extr):
         dt = chrome_time_to_utc(int(ts))
         url = unquote(url) # chrome urls are all quoted
         dd = int(durs)
-        dur: Optional[Second] = None if dd == 0 else dd // 1_000_000
+        dur: Second | None = None if dd == 0 else dd // 1_000_000
         return Visit(
             url=url,
             dt=dt,

promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl