PyPI - promnesia - Versions diffs - 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl - Mend

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

promnesia/__init__.py +18 -4
promnesia/__main__.py +104 -78
promnesia/cannon.py +108 -107
promnesia/common.py +107 -88
promnesia/compare.py +33 -30
promnesia/compat.py +10 -10
promnesia/config.py +37 -34
promnesia/database/common.py +4 -3
promnesia/database/dump.py +13 -13
promnesia/database/load.py +7 -7
promnesia/extract.py +19 -17
promnesia/logging.py +27 -15
promnesia/misc/install_server.py +32 -27
promnesia/server.py +106 -79
promnesia/sources/auto.py +104 -77
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +20 -10
promnesia/sources/browser_legacy.py +65 -50
promnesia/sources/demo.py +7 -8
promnesia/sources/fbmessenger.py +3 -3
promnesia/sources/filetypes.py +22 -16
promnesia/sources/github.py +9 -8
promnesia/sources/guess.py +6 -2
promnesia/sources/hackernews.py +7 -9
promnesia/sources/hpi.py +5 -3
promnesia/sources/html.py +11 -7
promnesia/sources/hypothesis.py +3 -2
promnesia/sources/instapaper.py +3 -2
promnesia/sources/markdown.py +22 -12
promnesia/sources/org.py +36 -17
promnesia/sources/plaintext.py +41 -39
promnesia/sources/pocket.py +5 -3
promnesia/sources/reddit.py +24 -26
promnesia/sources/roamresearch.py +5 -2
promnesia/sources/rss.py +6 -8
promnesia/sources/shellcmd.py +21 -11
promnesia/sources/signal.py +27 -26
promnesia/sources/smscalls.py +2 -3
promnesia/sources/stackexchange.py +5 -4
promnesia/sources/takeout.py +37 -34
promnesia/sources/takeout_legacy.py +29 -19
promnesia/sources/telegram.py +18 -12
promnesia/sources/telegram_legacy.py +22 -11
promnesia/sources/twitter.py +7 -6
promnesia/sources/vcs.py +11 -6
promnesia/sources/viber.py +11 -10
promnesia/sources/website.py +8 -7
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +13 -7
promnesia/tests/common.py +10 -5
promnesia/tests/server_helper.py +13 -10
promnesia/tests/sources/test_auto.py +2 -3
promnesia/tests/sources/test_filetypes.py +11 -8
promnesia/tests/sources/test_hypothesis.py +10 -6
promnesia/tests/sources/test_org.py +9 -5
promnesia/tests/sources/test_plaintext.py +9 -8
promnesia/tests/sources/test_shellcmd.py +13 -13
promnesia/tests/sources/test_takeout.py +3 -5
promnesia/tests/test_cannon.py +256 -239
promnesia/tests/test_cli.py +12 -8
promnesia/tests/test_compare.py +17 -13
promnesia/tests/test_config.py +7 -8
promnesia/tests/test_db_dump.py +15 -15
promnesia/tests/test_extract.py +17 -10
promnesia/tests/test_indexer.py +24 -18
promnesia/tests/test_server.py +12 -13
promnesia/tests/test_traverse.py +0 -2
promnesia/tests/utils.py +3 -7
promnesia-1.4.20250909.dist-info/METADATA +66 -0
promnesia-1.4.20250909.dist-info/RECORD +80 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
promnesia/kjson.py +0 -121
promnesia/sources/__init__.pyi +0 -0
promnesia-1.2.20240810.dist-info/METADATA +0 -54
promnesia-1.2.20240810.dist-info/RECORD +0 -83
promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0

promnesia/sources/auto.py CHANGED Viewed

@@ -2,36 +2,48 @@
 - discovers files recursively
 - guesses the format (orgmode/markdown/json/etc) by the extension/MIME type
 - can index most of plaintext files, including source code!
-- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
-- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
+- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/auto_obsidian.py][promnesia.sources.obsidian]]
+- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/auto_logseq.py][promnesia.sources.logseq]]
 """
+from __future__ import annotations
 import csv
-from concurrent.futures import ProcessPoolExecutor as Pool
-from contextlib import nullcontext
-from datetime import datetime
 import itertools
 import json
 import os
-from typing import Optional, Iterable, Union, List, Tuple, NamedTuple, Sequence, Iterator, Iterable, Callable, Any, Dict, Set
+from collections.abc import Callable, Iterable, Iterator, Sequence
+from concurrent.futures import ProcessPoolExecutor as Pool
+from contextlib import nullcontext
 from fnmatch import fnmatch
+from functools import wraps
 from pathlib import Path
-from functools import lru_cache, wraps
-import warnings
-import pytz
-from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
-from ..common import warn_once
-from ..config import use_cores
+from typing import Any, NamedTuple
+from promnesia.common import (
+    Loc,
+    PathIsh,
+    Result,
+    Results,
+    Visit,
+    echain,
+    extract_urls,
+    file_mtime,
+    get_logger,
+    get_tmpdir,
+    logger,
+    mime,
+    traverse,
+    warn_once,
+)
+from promnesia.config import use_cores
-from .filetypes import EUrl, Ctx
-from .auto_obsidian import obsidian_replacer
 from .auto_logseq import logseq_replacer
+from .auto_obsidian import obsidian_replacer
+from .filetypes import Ctx, EUrl
-def _collect(thing, path: List[str], result: List[EUrl]) -> None:
+def _collect(thing, path: list[str], result: list[EUrl]) -> None:
     if isinstance(thing, str):
         ctx: Ctx = tuple(path)
         result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
@@ -51,15 +63,16 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None:
 # TODO mm. okay, I suppose could use kython consuming thingy?..
-def collect_from(thing) -> List[EUrl]:
-    uuu: List[EUrl] = []
-    path: List[str] = []
+def collect_from(thing) -> list[EUrl]:
+    uuu: list[EUrl] = []
+    path: list[str] = []
     _collect(thing, path, uuu)
     return uuu
 Urls = Iterator[EUrl]
 def _csv(path: Path) -> Urls:
     # TODO these could also have Loc to be fair..
     with path.open() as fo:
@@ -77,6 +90,7 @@ def _json(path: Path) -> Urls:
 def _plaintext(path: Path) -> Results:
     from . import shellcmd
     from .plaintext import extract_from_path
     yield from shellcmd.index(extract_from_path(path))
@@ -85,7 +99,8 @@ def _plaintext(path: Path) -> Results:
 def fallback(ex):
     """Falls back to plaintext in case of issues"""
-    fallback_active: Dict[Any, bool] = {}
+    fallback_active: dict[Any, bool] = {}
     @wraps(ex)
     def wrapped(path: Path):
         nonlocal fallback_active
@@ -99,79 +114,83 @@ def fallback(ex):
             except ModuleNotFoundError as me:
                 logger = get_logger()
                 logger.exception(me)
-                logger.warn('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
+                logger.warning(
+                    '%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!',
+                    path,
+                    me.name,
+                    me.name,
+                )
                 yield me
                 fallback_active[ex] = True
                 do_fallback = True
         if do_fallback:
             yield from _plaintext(path)
     return wrapped
 @fallback
 def _markdown(path: Path) -> Results:
     from . import markdown
     yield from markdown.extract_from_file(path)
 @fallback
 def _html(path: Path) -> Results:
     from . import html
     yield from html.extract_from_file(path)
 @fallback
 def _org(path: Path) -> Results:
     from . import org
-    return org.extract_from_file(path)
-from .filetypes import TYPE2IDX, type2idx, IGNORE, CODE
-TYPE2IDX.update({
-    'application/json': _json,
-    '.json'           : _json,
-    '.ipynb'          : _json,
-    '.csv'           : _csv,
-    'application/csv': _csv,
-    '.org'        : _org,
-    '.org_archive': _org,
-    '.md'         : _markdown,
-    '.markdown'   : _markdown,
-    'text/plain'  : _plaintext,
-    '.txt'        : _plaintext,
-    '.page'       : _plaintext,
-    '.rst'        : _plaintext,
+    return org.extract_from_file(path)
-    # TODO doesn't work that great; weird stuff like
-    # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
-    # TODO could have stricter url extraction for that; always using http/https?
-    # '.ipynb'      : _json,
-    '.html'    : _html,
-    'text/html': _html,
-    'text/xml' : _plaintext,
-    'text/x-po': _plaintext, # some translation files
-})
+from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
+TYPE2IDX.update(
+    {
+        'application/json': _json,
+        '.json': _json,
+        '.ipynb': _json,
+        '.csv': _csv,
+        'application/csv': _csv,
+        '.org': _org,
+        '.org_archive': _org,
+        '.md': _markdown,
+        '.markdown': _markdown,
+        'text/plain': _plaintext,
+        '.txt': _plaintext,
+        '.page': _plaintext,
+        '.rst': _plaintext,
+        # TODO doesn't work that great; weird stuff like
+        # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
+        # TODO could have stricter url extraction for that; always using http/https?
+        # '.ipynb'      : _json,
+        '.html': _html,
+        'text/html': _html,
+        'text/xml': _plaintext,
+        'text/x-po': _plaintext,  # some translation files
+    }
+)
 for t in CODE:
     TYPE2IDX[t] = _plaintext
 # TODO ok, mime doesn't really tell between org/markdown/etc anyway
-Replacer = Optional[Callable[[str, str], str]]
+Replacer = Callable[[str, str], str] | None
 def index(
-        *paths: PathIsh,
-        ignored: Union[Sequence[str], str]=(),
-        follow: bool=True,
-        replacer: Replacer=None,
+    *paths: PathIsh,
+    ignored: Sequence[str] | str = (),
+    follow: bool = True,
+    replacer: Replacer = None,
 ) -> Results:
     '''
     path   : a path or list of paths to recursively index
@@ -204,16 +223,17 @@ def index(
         )
         yield from _index(apath, opts=opts)
 class Options(NamedTuple):
     ignored: Sequence[str]
     follow: bool
     # TODO option to add ignores? not sure..
     # TODO I don't like this replacer thing... think about removing it
     replacer: Replacer
-    root: Optional[Path]=None
+    root: Path | None = None
-def _index_file_aux(path: Path, opts: Options) -> Union[Exception, List[Result]]:
+def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
     # just a helper for the concurrent version (the generator isn't picklable)
     try:
         return list(_index_file(path, opts=opts))
@@ -226,14 +246,14 @@ def _index(path: Path, opts: Options) -> Results:
     logger = get_logger()
     cores = use_cores()
-    if cores is None: # do not use cores
+    if cores is None:  # do not use cores
         # todo use ExitStack instead?
         pool = nullcontext()
-        mapper = map # dummy pool
+        mapper = map  # dummy pool
     else:
         workers = None if cores == 0 else cores
-        pool = Pool(workers) # type: ignore
-        mapper = pool.map # type: ignore
+        pool = Pool(workers)  # type: ignore[assignment]
+        mapper = pool.map  # type: ignore[attr-defined]
     # iterate over resolved paths, to avoid duplicates
     def rit() -> Iterable[Path]:
@@ -243,18 +263,19 @@ def _index(path: Path, opts: Options) -> Results:
                 # TODO not sure if should log here... might end up with quite a bit of logs
                 logger.debug('ignoring %s: user ignore rules', p)
                 continue
-            if any(i in p.parts for i in IGNORE): # meh, not very efficient.. pass to traverse??
+            if any(i in p.parts for i in IGNORE):  # meh, not very efficient.. pass to traverse??
                 logger.debug('ignoring %s: default ignore rules', p)
                 continue
             p = p.resolve()
-            if not os.path.exists(p):
+            if not os.path.exists(p):  # noqa: PTH110
                 logger.debug('ignoring %s: broken symlink?', p)
                 continue
             yield p
     from more_itertools import unique_everseen
     it = unique_everseen(rit())
     with pool:
@@ -266,8 +287,10 @@ def _index(path: Path, opts: Options) -> Results:
 Mime = str
-from .filetypes import Ex # meh
-def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
+from .filetypes import Ex  # meh
+def by_path(pp: Path) -> tuple[Ex | None, Mime | None]:
     suf = pp.suffix.lower()
     # firt check suffixes, it's faster
     s = type2idx(suf)
@@ -289,9 +312,10 @@ def _index_file(pp: Path, opts: Options) -> Results:
     # TODO not even sure if it's used...
     suf = pp.suffix.lower()
-    if suf == '.xz': # TODO zstd?
+    if suf == '.xz':  # TODO zstd?
         import lzma
-        uname = pp.name[:-len('.xz')]  # chop off suffix, so the downstream indexer can handle it
+        uname = pp.name[: -len('.xz')]  # chop off suffix, so the downstream indexer can handle it
         assert pp.is_absolute(), pp
         # make sure to keep hierarchy, otherwise might end up with some name conflicts if filenames clash
@@ -318,7 +342,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
     logger.debug('indexing via %s: %s', ip.__name__, pp)
-    def indexer() -> Union[Urls, Results]:
+    def indexer() -> Urls | Results:
         # eh, annoying.. need to make more generic..
         idx = ip(pp)
         try:
@@ -346,17 +370,20 @@ def _index_file(pp: Path, opts: Options) -> Results:
             v = r
         loc = v.locator
-        if loc is not None and root is not None:
+        # FIXME double checke that v.locator indeed can't be none and remove the check?
+        if loc is not None and root is not None:  # type: ignore[redundant-expr]
             # meh. but it works
             # todo potentially, just use dataclasses instead...
             loc = loc._replace(title=loc.title.replace(str(root) + os.sep, ''))
             v = v._replace(locator=loc)
         if replacer is not None and root is not None:
-            upd: Dict[str, Any] = {}
+            upd: dict[str, Any] = {}
             href = v.locator.href
             if href is not None:
-                upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))
+                upd['locator'] = v.locator._replace(
+                    href=replacer(href, str(root)), title=replacer(v.locator.title, str(root))
+                )
             ctx = v.context
             if ctx is not None:
                 # TODO in context, http is unnecessary

promnesia/sources/auto_logseq.py CHANGED Viewed

@@ -1,14 +1,15 @@
 import os.path
 import urllib.parse
 def logseq_replacer(path: str, root: str) -> str:
-    if not path.startswith("editor://") or not (path.endswith('.md') or path.endswith('.org')):
+    if not path.startswith("editor://") or not (path.endswith((".md", ".org"))):
         return path
-    graph = os.path.basename(root)
-    page_name = os.path.basename(path).rsplit('.', 1)[0]
+    graph = os.path.basename(root)  # noqa: PTH119
+    page_name = os.path.basename(path).rsplit('.', 1)[0]  # noqa: PTH119
     encoded_page_name = urllib.parse.quote(page_name)
     uri = f"logseq://graph/{graph}?page={encoded_page_name}"
     return uri

promnesia/sources/auto_obsidian.py CHANGED Viewed

@@ -1,8 +1,8 @@
 def obsidian_replacer(p: str, r: str) -> str:
     if not p.startswith("editor://") or not p.endswith('.md'):
         return p
     path = p.split('/', 2)[-1]
     uri = f"obsidian://{path}"
     return uri

promnesia/sources/browser.py CHANGED Viewed

@@ -2,32 +2,37 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
 '''
+from __future__ import annotations
 import re
-from typing import Optional, Iterator, Any, TYPE_CHECKING
 import warnings
+from collections.abc import Iterator
+from typing import TYPE_CHECKING, Any
-from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db
+from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
-def index(p: Optional[PathIsh]=None) -> Results:
-    from . import hpi
+def index(p: PathIsh | None = None) -> Results:
+    from . import hpi  # noqa: F401
     if p is None:
         from my.browser.all import history
         yield from _index_new(history())
         return
     warnings.warn(
         f'Passing paths to promnesia.sources.browser is deprecated, you should setup my.browser.export instead. '
-        f'See https://github.com/seanbreckenridge/browserexport#hpi .'
+        f'See https://github.com/purarue/browserexport#hpi .'
         f'Will try to hack path to browser databases {p} into HPI config.'
     )
     try:
         yield from _index_new_with_adhoc_config(path=p)
-        return
     except Exception as e:
         logger.exception(e)
         warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
+    else:
+        return
     logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
     yield from _index_old(path=p)
@@ -35,22 +40,25 @@ def index(p: Optional[PathIsh]=None) -> Results:
 def _index_old(*, path: PathIsh) -> Results:
     from . import browser_legacy
     yield from browser_legacy.index(path)
 def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     ## previously, it was possible to index be called with multiple different db search paths
     ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
     ## so we hack cachew path so it's different for each call
     from my.core.core_config import config as hpi_core_config
     hpi_cache_dir = hpi_core_config.get_cache_dir()
     sanitized_path = re.sub(r'\W', '_', str(path))
     cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
     ##
-    from my.core.common import classproperty, Paths, get_files
+    from my.core.common import Paths, classproperty, get_files
     class config:
         class core:
             cache_dir = cache_override
@@ -62,8 +70,10 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
                     return tuple([f for f in get_files(path, glob='**/*') if is_sqlite_db(f)])
     from my.core.cfg import tmp_config
     with tmp_config(modules='my.browser.export|my.core.core_config', config=config):
         from my.browser.export import history
         yield from _index_new(history())
@@ -75,8 +85,8 @@ else:
 def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
     for v in history:
-        desc: Optional[str] = None
-        duration: Optional[Second] = None
+        desc: str | None = None
+        duration: Second | None = None
         metadata = v.metadata
         if metadata is not None:
             desc = metadata.title

promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl