PyPI - promnesia - Versions diffs - 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl - Mend

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

promnesia/__init__.py +18 -4
promnesia/__main__.py +104 -78
promnesia/cannon.py +108 -107
promnesia/common.py +107 -88
promnesia/compare.py +33 -30
promnesia/compat.py +10 -10
promnesia/config.py +37 -34
promnesia/database/common.py +4 -3
promnesia/database/dump.py +13 -13
promnesia/database/load.py +7 -7
promnesia/extract.py +19 -17
promnesia/logging.py +27 -15
promnesia/misc/install_server.py +32 -27
promnesia/server.py +106 -79
promnesia/sources/auto.py +104 -77
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +20 -10
promnesia/sources/browser_legacy.py +65 -50
promnesia/sources/demo.py +7 -8
promnesia/sources/fbmessenger.py +3 -3
promnesia/sources/filetypes.py +22 -16
promnesia/sources/github.py +9 -8
promnesia/sources/guess.py +6 -2
promnesia/sources/hackernews.py +7 -9
promnesia/sources/hpi.py +5 -3
promnesia/sources/html.py +11 -7
promnesia/sources/hypothesis.py +3 -2
promnesia/sources/instapaper.py +3 -2
promnesia/sources/markdown.py +22 -12
promnesia/sources/org.py +36 -17
promnesia/sources/plaintext.py +41 -39
promnesia/sources/pocket.py +5 -3
promnesia/sources/reddit.py +24 -26
promnesia/sources/roamresearch.py +5 -2
promnesia/sources/rss.py +6 -8
promnesia/sources/shellcmd.py +21 -11
promnesia/sources/signal.py +27 -26
promnesia/sources/smscalls.py +2 -3
promnesia/sources/stackexchange.py +5 -4
promnesia/sources/takeout.py +37 -34
promnesia/sources/takeout_legacy.py +29 -19
promnesia/sources/telegram.py +18 -12
promnesia/sources/telegram_legacy.py +22 -11
promnesia/sources/twitter.py +7 -6
promnesia/sources/vcs.py +11 -6
promnesia/sources/viber.py +11 -10
promnesia/sources/website.py +8 -7
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +13 -7
promnesia/tests/common.py +10 -5
promnesia/tests/server_helper.py +13 -10
promnesia/tests/sources/test_auto.py +2 -3
promnesia/tests/sources/test_filetypes.py +11 -8
promnesia/tests/sources/test_hypothesis.py +10 -6
promnesia/tests/sources/test_org.py +9 -5
promnesia/tests/sources/test_plaintext.py +9 -8
promnesia/tests/sources/test_shellcmd.py +13 -13
promnesia/tests/sources/test_takeout.py +3 -5
promnesia/tests/test_cannon.py +256 -239
promnesia/tests/test_cli.py +12 -8
promnesia/tests/test_compare.py +17 -13
promnesia/tests/test_config.py +7 -8
promnesia/tests/test_db_dump.py +15 -15
promnesia/tests/test_extract.py +17 -10
promnesia/tests/test_indexer.py +24 -18
promnesia/tests/test_server.py +12 -13
promnesia/tests/test_traverse.py +0 -2
promnesia/tests/utils.py +3 -7
promnesia-1.4.20250909.dist-info/METADATA +66 -0
promnesia-1.4.20250909.dist-info/RECORD +80 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
promnesia/kjson.py +0 -121
promnesia/sources/__init__.pyi +0 -0
promnesia-1.2.20240810.dist-info/METADATA +0 -54
promnesia-1.2.20240810.dist-info/RECORD +0 -83
promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
{promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0

promnesia/sources/signal.py CHANGED Viewed

@@ -2,34 +2,33 @@
 Collects visits from Signal Desktop's encrypted SQLIite db(s).
 """
+from __future__ import annotations
 # Functions get their defaults from module-data.
 #
 # * Open-ciphered-db adapted from:
 #   https://github.com/carderne/signal-export/commit/2284c8f4
 # * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
 import json
 import logging
 import platform
 import sqlite3
 import subprocess as sbp
+from collections.abc import Iterable, Iterator, Mapping
 from contextlib import contextmanager
 from pathlib import Path
 from textwrap import dedent, indent
-from typing import Any, Iterable, Iterator, Mapping, Union, Optional
+from typing import Any
 from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
-PathIshes = Union[PathIsh, Iterable[PathIsh]]
 def index(
     *db_paths: PathIsh,
     http_only: bool = False,
-    locator_schema: str="editor",
+    locator_schema: str = "editor",
     append_platform_path: bool = False,
-    override_key: Optional[str] = None,
+    override_key: str | None = None,
 ) -> Results:
     """
     :param db_paths:
@@ -51,8 +50,7 @@ def index(
         otherwise, this same key is used for harvesting all db-files.
     """
     logger.debug(
-        "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), "
-        "overide_key given?(%s), db_paths: %s",
+        "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), overide_key given?(%s), db_paths: %s",
         http_only,
         locator_schema,
         append_platform_path,
@@ -109,10 +107,10 @@ messages_query = dedent(
             id,
             type,
             coalesce(
-                profileFullName,
-                profileName,
+                profileFullName,
+                profileName,
                 name,
-                profileFamilyName,
+                profileFamilyName,
                 e164
             ) as aname,
             name,
@@ -171,7 +169,10 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
     Expansion code adapted from https://stackoverflow.com/a/51108375/548792
     to handle also degenerate cases (``'', '.', '/'``):
+    """
+    # NOTE: suppressing doctest from github actions
+    """
     >>> str(next(iter(_get_files('/'))))
     '/'
@@ -194,7 +195,7 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
     return path.glob(str(Path(*parts))) if parts else [path]
-def _expand_paths(paths: PathIshes) -> Iterable[Path]:
+def _expand_paths(paths: PathIsh | Iterable[PathIsh]) -> Iterable[Path]:
     if _is_pathish(paths):
         paths = [paths]  # type: ignore[list-item]
     return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr]
@@ -214,7 +215,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
         one or more pathish
     Note: needed `append` here, to resolve paths.
+    """
+    # NOTE: suppressing doctest from running on Github actions
+    """
     >>> bool(collect_db_paths())  # my home-path
     True
     >>> collect_db_paths(None)
@@ -237,11 +241,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
         platform_name = platform.system()
         try:
             plat_paths = platform_db_paths[platform_name]
-        except LookupError:
+        except LookupError as le:
             raise ValueError(
-                f"Unknown platform({platform_name}!"
-                f"\n  Expected one of {list(platform_db_paths.keys())}."
-            )
+                f"Unknown platform({platform_name}!\n  Expected one of {list(platform_db_paths.keys())}."
+            ) from le
         if db_paths and append:
             db_paths = [  # type: ignore[assignment]
@@ -261,7 +264,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
 def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
-    with open(signal_desktop_config_path, "r") as conf:
+    with Path(signal_desktop_config_path).open() as conf:
         return json.load(conf)["key"]
@@ -269,6 +272,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
 def connect_db(
     db_path: Path,
     key,
+    *,
     decrypt_db: bool = False,
     sqlcipher_exe: PathIsh = "sqlcipher",
     **decryption_pragmas: Mapping[str, Any],
@@ -324,16 +328,14 @@ def connect_db(
             )
             sql = "\n".join(sql_cmds)
             cmd = [sqlcipher_exe, str(db_path)]
-            logger.debug(
-                "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
-            )
+            logger.debug("Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql)
             try:
                 sbp.run(
                     cmd,
                     check=True,
                     input=sql,
                     capture_output=True,
-                    universal_newlines=True,
+                    text=True,
                 )
             except sbp.CalledProcessError as ex:
                 prefix = " " * 4
@@ -357,12 +359,11 @@ def connect_db(
         yield db
     finally:
         try:
-            if db:
+            if db is not None:
                 db.close()
         finally:
             if decrypted_file and decrypted_file.exists():
                 try:
                     logger.debug("Deleting temporary decrypted db: %s", decrypted_file)
                     decrypted_file.unlink()
                 except Exception as ex:
@@ -380,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
     if not urls:
         return
-    assert (
+    assert (  # noqa: PT018
         text and mid and sender and chatname
     ), f"should have eliminated messages without 'http' or missing ids: {row}"
@@ -400,7 +401,7 @@ def _harvest_db(
     db_path: Path,
     messages_query: str,
     *,
-    override_key: Optional[str] = None,
+    override_key: str | None = None,
     locator_schema: str = "editor",
     decrypt_db: bool = False,
     **decryption_pragmas,

promnesia/sources/smscalls.py CHANGED Viewed

@@ -2,15 +2,14 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
 '''
-from promnesia.common import Visit, Loc, Results, extract_urls
+from promnesia.common import Loc, Results, Visit, extract_urls
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.smscalls import messages
     for m in messages():
         if isinstance(m, Exception):
             yield m
             continue

promnesia/sources/stackexchange.py CHANGED Viewed

@@ -2,12 +2,13 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
 '''
-from ..common import Results, Visit, Loc
+from promnesia.common import Loc, Results, Visit
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     import my.stackexchange.gdpr as G
     for v in G.votes():
         if isinstance(v, Exception):
             yield v
@@ -15,7 +16,7 @@ def index() -> Results:
             yield Visit(
                 url=v.link,
                 dt=v.when,
-                context='voted', # todo use the votetype? although maybe worth ignoring downvotes
+                context='voted',  # todo use the votetype? although maybe worth ignoring downvotes
                 # or, downvotes could have 'negative' ranking or something
-                locator=Loc.make(title='voted', href=v.link)
+                locator=Loc.make(title='voted', href=v.link),
             )

promnesia/sources/takeout.py CHANGED Viewed

@@ -1,11 +1,14 @@
 '''
 Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
 '''
-from typing import Iterable, Set, Any, NamedTuple
+from __future__ import annotations
 import warnings
+from collections.abc import Iterable
+from typing import Any, NamedTuple
-from ..common import Visit, Loc, Results, logger
-from ..compat import removeprefix
+from promnesia.common import Loc, Results, Visit, logger
 # incase user is using an old version of google_takeout_parser
@@ -14,25 +17,34 @@ class YoutubeCSVStub(NamedTuple):
 def index() -> Results:
-    from . import hpi
-    import json
+    from . import hpi  # noqa: F401
     try:
+        from google_takeout_parser.models import (
+            Activity,
+            ChromeHistory,
+            LikedYoutubeVideo,
+            YoutubeComment,
+        )
+        from google_takeout_parser.parse_csv import (
+            extract_comment_links,
+            reconstruct_comment_content,
+        )
         from my.google.takeout.parser import events
-        from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
-        from google_takeout_parser.parse_csv import reconstruct_comment_content, extract_comment_links
     except ModuleNotFoundError as ex:
         logger.exception(ex)
         yield ex
-        warnings.warn("Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation.")
+        warnings.warn(
+            "Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation."
+        )
         from . import takeout_legacy
         yield from takeout_legacy.index()
         return
-    _seen: Set[str] = {
+    _seen: set[str] = {
         # these are definitely not useful for promnesia
         'Location',
         'PlaceVisit',
@@ -42,10 +54,13 @@ def index() -> Results:
     imported_yt_csv_models = False
     try:
         from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
         imported_yt_csv_models = True
     except ImportError:
         # warn user to upgrade google_takeout_parser
-        warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
+        warnings.warn(
+            "Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments"
+        )
         CSVYoutubeComment = YoutubeCSVStub  # type: ignore[misc,assignment]
         CSVYoutubeLiveChat = YoutubeCSVStub  # type: ignore[misc,assignment]
@@ -54,7 +69,7 @@ def index() -> Results:
         if et_name in _seen:
             return
         _seen.add(et_name)
-        yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
+        yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
     for e in events():
         if isinstance(e, Exception):
@@ -67,13 +82,13 @@ def index() -> Results:
                 # when you follow something from search the actual url goes after this
                 # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
                 # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
-                url = removeprefix(url, "https://www.google.com/url?q=")
+                url = url.removeprefix("https://www.google.com/url?q=")
                 title = e.title
                 if e.header == 'Chrome':
                     # title contains 'Visited <page title>' in this case
                     context = None
-                    title = removeprefix(title, 'Visited ')
+                    title = title.removeprefix('Visited ')
                 elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
                     # todo perhaps could add to some sort of metadata?
                     # only useful for debugging really
@@ -120,18 +135,14 @@ def index() -> Results:
         elif isinstance(e, LikedYoutubeVideo):
             # TODO not sure if desc makes sense here since it's not user produced data
             # it's just a part of video meta?
-            yield Visit(
-                url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link)
-            )
+            yield Visit(url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link))
         elif isinstance(e, YoutubeComment):
             for url in e.urls:
                 # todo: use url_metadata to improve locator?
                 # or maybe just extract first sentence?
-                yield Visit(
-                    url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
-                )
+                yield Visit(url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url))
         elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
-            contentJSON = json.loads(e.contentJSON)
+            contentJSON = e.contentJSON
             content = reconstruct_comment_content(contentJSON, format='text')
             if isinstance(content, Exception):
                 yield content
@@ -142,14 +153,10 @@ def index() -> Results:
                 continue
             context = f"Commented on {e.video_url}"
             for url in links:
-                yield Visit(
-                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
-                )
-            yield Visit(
-                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
-            )
+                yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
+            yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
         elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
-            contentJSON = json.loads(e.contentJSON)
+            contentJSON = e.contentJSON
             content = reconstruct_comment_content(contentJSON, format='text')
             if isinstance(content, Exception):
                 yield content
@@ -160,12 +167,8 @@ def index() -> Results:
                 continue
             context = f"Commented on livestream {e.video_url}"
             for url in links:
-                yield Visit(
-                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
-                )
-            yield Visit(
-                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
-            )
+                yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
+            yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
         else:
             yield from warn_once_if_not_seen(e)

promnesia/sources/takeout_legacy.py CHANGED Viewed

@@ -1,17 +1,23 @@
-from ..common import Visit, logger, PathIsh, Url, Loc, Results
+from __future__ import annotations
+from promnesia.common import Loc, Results, Visit, logger
 # TODO make an iterator, insert in db as we go? handle errors gracefully?
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.google.takeout.paths import get_takeouts
     takeouts = list(get_takeouts())
     # TODO if no takeouts, raise?
     # although could raise a warning on top level, when source emitted no takeouts
     # TODO youtube?
+    # fmt: off
     google_activities = [read_google_activity(t)      for t in takeouts]
     search_activities = [read_search_activity(t)      for t in takeouts]
     browser_histories = [read_browser_history_json(t) for t in takeouts]
+    # fmt: on
     key = lambda v: (v.dt, v.url)
     return chain(
@@ -21,25 +27,22 @@ def index() -> Results:
     )
-import pytz
+import json
+from collections.abc import Iterable
+from datetime import datetime, timezone
 from itertools import chain
-from datetime import datetime
-from typing import List, Optional, Iterable, TYPE_CHECKING
 from pathlib import Path
-import json
-from .. import config
 from more_itertools import unique_everseen
+from promnesia import config
 try:
     from cachew import cachew
 except ModuleNotFoundError as me:
     if me.name != 'cachew':
         raise me
     # this module is legacy anyway, so just make it defensive
     def cachew(*args, **kwargs):  # type: ignore[no-redef]
         return lambda f: f
@@ -50,7 +53,9 @@ TakeoutPath = Path
 def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
-    from my.core.kompress import kexists
+    # FIXME switch to actual kompress? and use CPath?
+    from my.core.kompress import kexists  # type: ignore[attr-defined]
     # TODO glob
     # TODO not sure about windows path separators??
     spath = 'Takeout/My Activity/' + kind
@@ -61,7 +66,8 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
     locator = Loc.file(spath)
     from my.google.takeout.html import read_html
-    for dt, url, title in read_html(takeout, spath):
+    for dt, url, _title in read_html(takeout, spath):
         yield Visit(
             url=url,
             dt=dt,
@@ -69,6 +75,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
             debug=kind,
         )
 def _cpath(suffix: str):
     def fun(takeout: TakeoutPath):
         cache_dir = config.get().cache_dir
@@ -76,23 +83,27 @@ def _cpath(suffix: str):
             return None
         # doesn't need a nontrivial hash function, timestsamp is encoded in name
         return cache_dir / (takeout.name + '_' + suffix + '.cache')
     return fun
 # todo caching should this be HPI responsibility?
 # todo set global cachew logging on init?
-@cachew(cache_path=_cpath('google_activity') , logger=logger)
+@cachew(cache_path=_cpath('google_activity'), logger=logger)
 def read_google_activity(takeout: TakeoutPath) -> Iterable[Visit]:
     return _read_myactivity_html(takeout, 'Chrome/MyActivity.html')
-@cachew(cache_path=_cpath('search_activity') , logger=logger)
+@cachew(cache_path=_cpath('search_activity'), logger=logger)
 def read_search_activity(takeout: TakeoutPath) -> Iterable[Visit]:
     return _read_myactivity_html(takeout, 'Search/MyActivity.html')
 # TODO add this to tests?
 @cachew(cache_path=_cpath('browser_activity'), logger=logger)
 def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
-    from my.core.kompress import kexists, kopen
+    from my.core.kompress import kexists, kopen  # type: ignore[attr-defined]
     # not sure if this deserves moving to HPI? it's pretty trivial for now
     spath = 'Takeout/Chrome/BrowserHistory.json'
@@ -107,13 +118,13 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
     # TODO this should be supported by HPI now?
     j = None
-    with kopen(takeout, spath) as fo: # TODO iterative parser?
+    with kopen(takeout, spath) as fo:  # TODO iterative parser?
         j = json.load(fo)
     hist = j['Browser History']
     for item in hist:
         url = item['url']
-        time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
+        time = datetime.fromtimestamp(item['time_usec'] / 10**6, tz=timezone.utc)
         # TODO any more interesitng info?
         yield Visit(
             url=url,
@@ -121,4 +132,3 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
             locator=locator,
             debug='Chrome/BrowserHistory.json',
         )

promnesia/sources/telegram.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from typing import Optional
-from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
+from __future__ import annotations
 import warnings
+from urllib.parse import unquote  # TODO mm, make it easier to rememember to use...
-from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh
+from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
-def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False)  -> Results:
+def index(database: PathIsh | None = None, *, http_only: bool = False, with_extra_media_info: bool = False) -> Results:
     if database is None:
         # fully relying on HPI
         yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -16,11 +17,14 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
         f'Will try to hack database path {database} into HPI config.'
     )
     try:
-        yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
-        return
+        yield from _index_new_with_adhoc_config(
+            database=database, http_only=http_only, with_extra_media_info=with_extra_media_info
+        )
     except Exception as e:
         logger.exception(e)
         warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
+    else:
+        return
     logger.warning("Falling back onto promnesia.sources.telegram_legacy module")
     yield from _index_legacy(database=database, http_only=http_only)
@@ -28,11 +32,12 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
 def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
     from . import telegram_legacy
     yield from telegram_legacy.index(database=database, http_only=http_only)
 def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401
     class config:
         class telegram:
@@ -40,19 +45,20 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
                 export_path: PathIsh = database
     from my.core.cfg import tmp_config
     with tmp_config(modules='my.telegram.telegram_backup', config=config):
         yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
 def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.telegram.telegram_backup import messages
     extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None
-    for i, m in enumerate(messages(
-            with_extra_media_info=with_extra_media_info,
-            extra_where=extra_where,
-    )):
+    for m in messages(
+        with_extra_media_info=with_extra_media_info,
+        extra_where=extra_where,
+    ):
         text = m.text
         urls = extract_urls(text)

promnesia/sources/telegram_legacy.py CHANGED Viewed

@@ -2,34 +2,42 @@
 Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data
 '''
-from pathlib import Path
+from __future__ import annotations
 import sqlite3
+from pathlib import Path
 from textwrap import dedent
-from typing import Union, TypeVar
-from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
+from typing import TypeVar
+from urllib.parse import unquote  # TODO mm, make it easier to rememember to use...
+from promnesia.common import (
+    Loc,
+    PathIsh,
+    Results,
+    Visit,
+    echain,
+    extract_urls,
+    from_epoch,
+)
-from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain
 from ..sqlite import sqlite_connection
 T = TypeVar("T")
-def unwrap(res: Union[T, Exception]) -> T:
+def unwrap(res: T | Exception) -> T:
     if isinstance(res, Exception):
         raise res
-    else:
-        return res
+    return res
-def index(database: PathIsh, *, http_only: bool=False) -> Results:
+def index(database: PathIsh, *, http_only: bool = False) -> Results:
     """
     :param database:
         the path of the sqlite generated by the _telegram_backup_ java program
     :param http_only:
         when true, do not collect IP-addresses and `python.py` strings
     """
-    logger = get_logger()
     path = Path(database)
     assert path.is_file(), path
@@ -66,7 +74,8 @@ def index(database: PathIsh, *, http_only: bool=False) -> Results:
                 M.message_type NOT IN ('service_message', 'empty_message')
                 {extra_criteria}
             ORDER BY time;
-            """)
+            """
+        )
     with sqlite_connection(path, immutable=True, row_factory='row') as db:
         # TODO yield error if chatname or chat or smth else is null?
@@ -94,6 +103,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
     urls = extract_urls(text)
     if len(urls) == 0:
         return
+    # fmt: off
     dt            = from_epoch(row['time'])
     mid: str      = unwrap(row['mid'])
@@ -101,6 +111,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
     sender: str   = unwrap(row['sender'])
     chatname: str = unwrap(row['chatname'])
     chat: str     = unwrap(row['chat'])
+    # fmt: on
     in_context = f'https://t.me/{chat}/{mid}'
     for u in urls:

promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

promnesia 1.2.20240810py3-none-any.whl → 1.4.20250909py3-none-any.whl