PyPI - promnesia - Versions diffs - 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl - Mend

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

promnesia/__init__.py +14 -3
promnesia/__main__.py +60 -35
promnesia/cannon.py +27 -27
promnesia/common.py +85 -67
promnesia/compare.py +21 -22
promnesia/compat.py +10 -10
promnesia/config.py +23 -23
promnesia/database/common.py +67 -0
promnesia/database/dump.py +188 -0
promnesia/{read_db.py → database/load.py} +16 -17
promnesia/extract.py +14 -11
promnesia/kjson.py +12 -11
promnesia/logging.py +4 -4
promnesia/misc/__init__.pyi +0 -0
promnesia/misc/config_example.py +1 -2
promnesia/misc/install_server.py +7 -9
promnesia/server.py +57 -47
promnesia/sources/__init__.pyi +0 -0
promnesia/sources/auto.py +50 -35
promnesia/sources/auto_logseq.py +6 -5
promnesia/sources/auto_obsidian.py +2 -2
promnesia/sources/browser.py +14 -9
promnesia/sources/browser_legacy.py +26 -16
promnesia/sources/demo.py +19 -3
promnesia/sources/fbmessenger.py +3 -2
promnesia/sources/filetypes.py +16 -7
promnesia/sources/github.py +7 -9
promnesia/sources/guess.py +2 -1
promnesia/sources/hackernews.py +2 -2
promnesia/sources/hpi.py +2 -2
promnesia/sources/html.py +7 -5
promnesia/sources/hypothesis.py +4 -3
promnesia/sources/instapaper.py +2 -2
promnesia/sources/markdown.py +31 -21
promnesia/sources/org.py +27 -13
promnesia/sources/plaintext.py +30 -29
promnesia/sources/pocket.py +3 -2
promnesia/sources/reddit.py +20 -19
promnesia/sources/roamresearch.py +2 -1
promnesia/sources/rss.py +4 -5
promnesia/sources/shellcmd.py +19 -6
promnesia/sources/signal.py +33 -24
promnesia/sources/smscalls.py +2 -2
promnesia/sources/stackexchange.py +4 -3
promnesia/sources/takeout.py +76 -9
promnesia/sources/takeout_legacy.py +24 -12
promnesia/sources/telegram.py +13 -11
promnesia/sources/telegram_legacy.py +18 -7
promnesia/sources/twitter.py +6 -5
promnesia/sources/vcs.py +5 -3
promnesia/sources/viber.py +10 -9
promnesia/sources/website.py +4 -4
promnesia/sources/zulip.py +3 -2
promnesia/sqlite.py +7 -4
promnesia/tests/__init__.py +0 -0
promnesia/tests/common.py +140 -0
promnesia/tests/server_helper.py +67 -0
promnesia/tests/sources/__init__.py +0 -0
promnesia/tests/sources/test_auto.py +65 -0
promnesia/tests/sources/test_filetypes.py +43 -0
promnesia/tests/sources/test_hypothesis.py +39 -0
promnesia/tests/sources/test_org.py +64 -0
promnesia/tests/sources/test_plaintext.py +25 -0
promnesia/tests/sources/test_shellcmd.py +21 -0
promnesia/tests/sources/test_takeout.py +56 -0
promnesia/tests/test_cannon.py +325 -0
promnesia/tests/test_cli.py +40 -0
promnesia/tests/test_compare.py +30 -0
promnesia/tests/test_config.py +289 -0
promnesia/tests/test_db_dump.py +222 -0
promnesia/tests/test_extract.py +65 -0
promnesia/tests/test_extract_urls.py +43 -0
promnesia/tests/test_indexer.py +251 -0
promnesia/tests/test_server.py +291 -0
promnesia/tests/test_traverse.py +39 -0
promnesia/tests/utils.py +35 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
promnesia-1.3.20241021.dist-info/RECORD +83 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
promnesia/dump.py +0 -105
promnesia-1.2.20230515.dist-info/RECORD +0 -58
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0

promnesia/sources/roamresearch.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Roam Research data
 '''
-from ..common import Results, Visit, Loc, extract_urls
+from promnesia.common import Loc, Results, Visit, extract_urls
 def index() -> Results:
@@ -43,6 +43,7 @@ def _collect(node: 'RoamNode') -> Results:
 import typing
 if typing.TYPE_CHECKING:
     import my.roamresearch as RR
     RoamNode = RR.Node

promnesia/sources/rss.py CHANGED Viewed

@@ -2,14 +2,12 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
 '''
-from itertools import chain
-from ..common import Visit, Loc, extract_urls, Results, get_logger
 from datetime import datetime
 import pytz
+from promnesia.common import Loc, Results, Visit
 # arbitrary,  2011-11-04 00:05:23.283+00:00
 default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
 # TODO FIXME allow for visit not to have datetime?
@@ -17,12 +15,13 @@ default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
 def index() -> Results:
     from my.rss.all import subscriptions
     for feed in subscriptions():
         # TODO locator should be optional too? although could use direct link in the rss reader interface
         locator = Loc.make(title='my.rss')
         yield Visit(
             url=feed.url,
             dt=feed.created_at or default_datetime,
-            context=f'RSS subscription', # TODO use 'provider', etc?
+            context='RSS subscription', # TODO use 'provider', etc?
             locator=locator,
         )

promnesia/sources/shellcmd.py CHANGED Viewed

@@ -2,18 +2,31 @@
 Greps out URLs from an arbitrary shell command results.
 """
-from datetime import datetime
+from __future__ import annotations
 import os
 import re
-from subprocess import run, PIPE
-from typing import Union, Sequence
 import warnings
+from collections.abc import Sequence
+from datetime import datetime
+from subprocess import PIPE, run
+from promnesia.common import (
+    Loc,
+    PathIsh,
+    Results,
+    Visit,
+    _is_windows,
+    extract_urls,
+    file_mtime,
+    get_system_tz,
+    now_tz,
+)
-from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh
 from .plaintext import _has_grep
-def index(command: Union[str, Sequence[PathIsh]]) -> Results:
+def index(command: str | Sequence[PathIsh]) -> Results:
     cmd: Sequence[PathIsh]
     cmds: str
     if isinstance(command, str):
@@ -71,7 +84,7 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
                 context=context,
             )
-    r = run(cmd, stdout=PIPE)
+    r = run(cmd, stdout=PIPE, check=False)
     if r.returncode > 0:
         if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
             r.check_returncode()

promnesia/sources/signal.py CHANGED Viewed

@@ -1,23 +1,23 @@
 """
 Collects visits from Signal Desktop's encrypted SQLIite db(s).
 """
+from __future__ import annotations
 # Functions get their defaults from module-data.
 #
 # * Open-ciphered-db adapted from:
 #   https://github.com/carderne/signal-export/commit/2284c8f4
 # * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
 import json
 import logging
 import platform
 import sqlite3
 import subprocess as sbp
+from collections.abc import Iterable, Iterator, Mapping
 from contextlib import contextmanager
 from pathlib import Path
 from textwrap import dedent, indent
-from typing import Any, Iterable, Iterator, Mapping, Union, Optional
+from typing import Any, Union
 from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
@@ -29,7 +29,7 @@ def index(
     http_only: bool = False,
     locator_schema: str="editor",
     append_platform_path: bool = False,
-    override_key: Optional[str] = None,
+    override_key: str | None = None,
 ) -> Results:
     """
     :param db_paths:
@@ -63,6 +63,8 @@ def index(
     logger.debug("Paths to harvest: %s", db_paths)
     if not http_only:
         sql_query = f"{messages_query}\nWHERE body LIKE '%http%'"
+    else:
+        sql_query = messages_query
     for db_path in resolved_db_paths:
         logger.info("Ciphered db to harvest %s", db_path)
@@ -106,12 +108,18 @@ messages_query = dedent(
         SELECT
             id,
             type,
-            coalesce(name, profileName, profileFamilyName, e164) as aname,
+            coalesce(
+                profileFullName,
+                profileName,
+                name,
+                profileFamilyName,
+                e164
+            ) as aname,
             name,
             profileName,
             profileFamilyName,
             e164,
-            uuid
+            serviceId
         FROM conversations
     ),
     Msgs AS (
@@ -123,8 +131,8 @@ messages_query = dedent(
                 M.received_at,
                 M.sent_at
             ) AS timestamp,
-            IIF(M.type = "outgoing",
-                "Me (" || C2.aname || ")",
+            IIF(M.type = 'outgoing',
+                'Me (' || C2.aname || ')',
                 C2.aname
             ) AS sender,
             M.conversationId AS cid,
@@ -138,7 +146,7 @@ messages_query = dedent(
         INNER JOIN Cons AS C1
             ON M.conversationId = C1.id
         INNER JOIN Cons AS C2
-            ON M.sourceUuid = C2.uuid
+            ON M.sourceServiceId = C2.serviceId
     )
     SELECT id, timestamp, sender, cid, chatname, body
     FROM Msgs
@@ -188,8 +196,8 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
 def _expand_paths(paths: PathIshes) -> Iterable[Path]:
     if _is_pathish(paths):
-        paths = [paths]  # type: ignore[assignment,list-item]
-    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr,list-item]
+        paths = [paths]  # type: ignore[list-item]
+    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr]
 def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]:
@@ -229,14 +237,14 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
         platform_name = platform.system()
         try:
             plat_paths = platform_db_paths[platform_name]
-        except LookupError:
+        except LookupError as le:
             raise ValueError(
                 f"Unknown platform({platform_name}!"
                 f"\n  Expected one of {list(platform_db_paths.keys())}."
-            )
+            ) from le
         if db_paths and append:
-            db_paths = [  # type: ignore[misc,assignment]
+            db_paths = [  # type: ignore[assignment]
                 *([db_paths] if _is_pathish(db_paths) else db_paths),
                 plat_paths,
             ]
@@ -253,7 +261,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
 def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
-    with open(signal_desktop_config_path, "r") as conf:
+    with Path(signal_desktop_config_path).open() as conf:
         return json.load(conf)["key"]
@@ -261,6 +269,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
 def connect_db(
     db_path: Path,
     key,
+    *,
     decrypt_db: bool = False,
     sqlcipher_exe: PathIsh = "sqlcipher",
     **decryption_pragmas: Mapping[str, Any],
@@ -310,8 +319,8 @@ def connect_db(
             sql_cmds.extend(
                 [
                     f"ATTACH DATABASE '{decrypted_file}' AS plaintext KEY '';",
-                    f"SELECT sqlcipher_export('plaintext');",
-                    f"DETACH DATABASE plaintext;",
+                    "SELECT sqlcipher_export('plaintext');",
+                    "DETACH DATABASE plaintext;",
                 ]
             )
             sql = "\n".join(sql_cmds)
@@ -320,12 +329,12 @@ def connect_db(
                 "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
             )
             try:
-                sbp.run(  # type: ignore[call-overload]
+                sbp.run(
                     cmd,
                     check=True,
                     input=sql,
                     capture_output=True,
-                    universal_newlines=True,
+                    text=True,
                 )
             except sbp.CalledProcessError as ex:
                 prefix = " " * 4
@@ -335,7 +344,7 @@ def connect_db(
                 ) from None
             db = sqlite3.connect(f"file:{decrypted_file}?mode=ro", uri=True)
         else:
-            from sqlcipher3 import dbapi2  # type: ignore[import]
+            from sqlcipher3 import dbapi2  # type: ignore[import-not-found]
             db = dbapi2.connect(f"file:{db_path}?mode=ro", uri=True)
             # Param-binding doesn't work for pragmas, so use a direct string concat.
@@ -372,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
     if not urls:
         return
-    assert (
+    assert (  # noqa: PT018
         text and mid and sender and chatname
     ), f"should have eliminated messages without 'http' or missing ids: {row}"
@@ -392,7 +401,7 @@ def _harvest_db(
     db_path: Path,
     messages_query: str,
     *,
-    override_key: Optional[str] = None,
+    override_key: str | None = None,
     locator_schema: str = "editor",
     decrypt_db: bool = False,
     **decryption_pragmas,
@@ -419,9 +428,9 @@ def _harvest_db(
     with connect_db(db_path, key, decrypt_db=decrypt_db, **decryption_pragmas) as db:
         for mid, tstamp, sender, cid, chatname, text in db.execute(messages_query):
+            tstamp = from_epoch(tstamp / 1000.0)
+            row = (mid, tstamp, sender, cid, chatname, text)
             try:
-                tstamp = from_epoch(tstamp / 1000.0)
-                row = (mid, tstamp, sender, cid, chatname, text)
                 yield from _handle_row(row, db_path, locator_schema)
             except Exception as ex:
                 # TODO: also insert errors in db

promnesia/sources/smscalls.py CHANGED Viewed

@@ -2,11 +2,11 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
 '''
-from promnesia.common import Visit, Loc, Results, extract_urls
+from promnesia.common import Loc, Results, Visit, extract_urls
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.smscalls import messages
     for m in messages():

promnesia/sources/stackexchange.py CHANGED Viewed

@@ -2,12 +2,13 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
 '''
-from ..common import Results, Visit, Loc, extract_urls
+from promnesia.common import Loc, Results, Visit
 def index() -> Results:
-    from . import hpi
-    import my.stackexchange.gdpr as G # type: ignore[import] # TODO eh, not sure if should run against pypi or not...
+    from . import hpi  # noqa: F401,I001
+    import my.stackexchange.gdpr as G
     for v in G.votes():
         if isinstance(v, Exception):
             yield v

promnesia/sources/takeout.py CHANGED Viewed

@@ -1,19 +1,36 @@
 '''
 Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
 '''
-from typing import Iterable, Set, Any
+from __future__ import annotations
 import warnings
+from collections.abc import Iterable
+from typing import Any, NamedTuple
+from promnesia.common import Loc, Results, Visit, logger
-from ..common import Visit, Loc, Results, logger
-from ..compat import removeprefix
+# incase user is using an old version of google_takeout_parser
+class YoutubeCSVStub(NamedTuple):
+    contentJSON: str
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401
     try:
+        from google_takeout_parser.models import (
+            Activity,
+            ChromeHistory,
+            LikedYoutubeVideo,
+            YoutubeComment,
+        )
+        from google_takeout_parser.parse_csv import (
+            extract_comment_links,
+            reconstruct_comment_content,
+        )
         from my.google.takeout.parser import events
-        from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
     except ModuleNotFoundError as ex:
         logger.exception(ex)
         yield ex
@@ -24,18 +41,30 @@ def index() -> Results:
         yield from takeout_legacy.index()
         return
-    _seen: Set[str] = {
+    _seen: set[str] = {
         # these are definitely not useful for promnesia
         'Location',
         'PlaceVisit',
         'PlayStoreAppInstall',
     }
+    imported_yt_csv_models = False
+    try:
+        from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
+        imported_yt_csv_models = True
+    except ImportError:
+        # warn user to upgrade google_takeout_parser
+        warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
+        CSVYoutubeComment = YoutubeCSVStub  # type: ignore[misc,assignment]
+        CSVYoutubeLiveChat = YoutubeCSVStub  # type: ignore[misc,assignment]
     def warn_once_if_not_seen(e: Any) -> Iterable[Exception]:
         et_name = type(e).__name__
         if et_name in _seen:
             return
         _seen.add(et_name)
-        yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
+        yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
     for e in events():
         if isinstance(e, Exception):
@@ -48,13 +77,13 @@ def index() -> Results:
                 # when you follow something from search the actual url goes after this
                 # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
                 # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
-                url = removeprefix(url, "https://www.google.com/url?q=")
+                url = url.removeprefix("https://www.google.com/url?q=")
                 title = e.title
                 if e.header == 'Chrome':
                     # title contains 'Visited <page title>' in this case
                     context = None
-                    title = removeprefix(title, 'Visited ')
+                    title = title.removeprefix('Visited ')
                 elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
                     # todo perhaps could add to some sort of metadata?
                     # only useful for debugging really
@@ -71,6 +100,8 @@ def index() -> Results:
                 elif e.products == ['Ads']:
                     # header contains some weird internal ad id in this case
                     context = None
+                else:
+                    context = None
                 # NOTE: at this point seems that context always ends up as None (at least for @karlicoss as of 20230131)
                 # so alternatively could just force it to be None instead of manual dispatching :shrug:
                 yield Visit(
@@ -109,6 +140,42 @@ def index() -> Results:
                 yield Visit(
                     url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
                 )
+        elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
+            contentJSON = e.contentJSON
+            content = reconstruct_comment_content(contentJSON, format='text')
+            if isinstance(content, Exception):
+                yield content
+                continue
+            links = extract_comment_links(contentJSON)
+            if isinstance(links, Exception):
+                yield links
+                continue
+            context = f"Commented on {e.video_url}"
+            for url in links:
+                yield Visit(
+                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
+                )
+            yield Visit(
+                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
+            )
+        elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
+            contentJSON = e.contentJSON
+            content = reconstruct_comment_content(contentJSON, format='text')
+            if isinstance(content, Exception):
+                yield content
+                continue
+            links = extract_comment_links(contentJSON)
+            if isinstance(links, Exception):
+                yield links
+                continue
+            context = f"Commented on livestream {e.video_url}"
+            for url in links:
+                yield Visit(
+                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
+                )
+            yield Visit(
+                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
+            )
         else:
             yield from warn_once_if_not_seen(e)

promnesia/sources/takeout_legacy.py CHANGED Viewed

@@ -1,9 +1,13 @@
-from ..common import Visit, logger, PathIsh, Url, Loc, Results
+from __future__ import annotations
+from promnesia.common import Loc, Results, Visit, logger
 # TODO make an iterator, insert in db as we go? handle errors gracefully?
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.google.takeout.paths import get_takeouts
     takeouts = list(get_takeouts())
     # TODO if no takeouts, raise?
     # although could raise a warning on top level, when source emitted no takeouts
@@ -22,19 +26,25 @@ def index() -> Results:
-import pytz
-from itertools import chain
+import json
+from collections.abc import Iterable
 from datetime import datetime
-from typing import List, Optional, Iterable, TYPE_CHECKING
+from itertools import chain
 from pathlib import Path
-import json
-from .. import config
+import pytz
+from more_itertools import unique_everseen
+from promnesia import config
-from more_itertools import unique_everseen
-from cachew import cachew
+try:
+    from cachew import cachew
+except ModuleNotFoundError as me:
+    if me.name != 'cachew':
+        raise me
+    # this module is legacy anyway, so just make it defensive
+    def cachew(*args, **kwargs):  # type: ignore[no-redef]
+        return lambda f: f
 # TODO use CPath? Could encapsulate a path within an archive *or* within a directory
@@ -42,7 +52,9 @@ TakeoutPath = Path
 def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
+    # FIXME switch to actual kompress? and use CPath?
     from my.core.kompress import kexists
     # TODO glob
     # TODO not sure about windows path separators??
     spath = 'Takeout/My Activity/' + kind
@@ -53,7 +65,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
     locator = Loc.file(spath)
     from my.google.takeout.html import read_html
-    for dt, url, title in read_html(takeout, spath):
+    for dt, url, _title in read_html(takeout, spath):
         yield Visit(
             url=url,
             dt=dt,
@@ -105,7 +117,7 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
     hist = j['Browser History']
     for item in hist:
         url = item['url']
-        time = datetime.utcfromtimestamp(item['time_usec'] / 10 ** 6).replace(tzinfo=pytz.utc)
+        time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
         # TODO any more interesitng info?
         yield Visit(
             url=url,

promnesia/sources/telegram.py CHANGED Viewed

@@ -1,11 +1,12 @@
-from typing import Optional
-from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
+from __future__ import annotations
 import warnings
+from urllib.parse import unquote  # TODO mm, make it easier to rememember to use...
-from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh
+from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
-def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False)  -> Results:
+def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False)  -> Results:
     if database is None:
         # fully relying on HPI
         yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -17,10 +18,11 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
     )
     try:
         yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
-        return
     except Exception as e:
         logger.exception(e)
         warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
+    else:
+        return
     logger.warning("Falling back onto promnesia.sources.telegram_legacy module")
     yield from _index_legacy(database=database, http_only=http_only)
@@ -32,7 +34,7 @@ def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
 def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     class config:
         class telegram:
@@ -45,14 +47,14 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
 def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     from my.telegram.telegram_backup import messages
     extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None
-    for i, m in enumerate(messages(
-            with_extra_media_info=with_extra_media_info,
-            extra_where=extra_where,
-    )):
+    for m in messages(
+        with_extra_media_info=with_extra_media_info,
+        extra_where=extra_where,
+    ):
         text = m.text
         urls = extract_urls(text)

promnesia/sources/telegram_legacy.py CHANGED Viewed

@@ -2,23 +2,34 @@
 Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data
 '''
-from pathlib import Path
+from __future__ import annotations
 import sqlite3
+from pathlib import Path
 from textwrap import dedent
-from typing import Union, TypeVar
-from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
+from typing import TypeVar
+from urllib.parse import unquote  # TODO mm, make it easier to rememember to use...
+from promnesia.common import (
+    Loc,
+    PathIsh,
+    Results,
+    Visit,
+    echain,
+    extract_urls,
+    from_epoch,
+    get_logger,
+)
-from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain
 from ..sqlite import sqlite_connection
 T = TypeVar("T")
-def unwrap(res: Union[T, Exception]) -> T:
+def unwrap(res: T | Exception) -> T:
     if isinstance(res, Exception):
         raise res
-    else:
-        return res
+    return res
 def index(database: PathIsh, *, http_only: bool=False) -> Results:

promnesia/sources/twitter.py CHANGED Viewed

@@ -1,18 +1,19 @@
 '''
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Twitter data.
 '''
-from typing import Iterable
-from ..common import logger, Results, Visit, Loc, extract_urls, Res
+from collections.abc import Iterable
+from promnesia.common import Loc, Res, Results, Visit, extract_urls, logger
 def index() -> Results:
-    from . import hpi
+    from . import hpi  # noqa: F401,I001
     import my.twitter.all as tw
+    from my.twitter.archive import Tweet  # todo extract to common or something?
     # TODO hmm. tweets themselves are sort of visits? not sure if they should contribute..
     processed = 0
-    from my.twitter.archive import Tweet # todo extract to common or something?
     tweets: Iterable[Res[Tweet]] = tw.tweets()
     for t in tweets:
         if isinstance(t, Exception):

promnesia/sources/vcs.py CHANGED Viewed

@@ -1,12 +1,14 @@
 '''
 Clones & indexes Git repositories (via sources.auto)
 '''
-# TODO not sure if worth exposing... could be just handled by auto or something?)
+from __future__ import annotations
-from pathlib import Path
 import re
+from collections.abc import Iterable
+# TODO not sure if worth exposing... could be just handled by auto or something?)
+from pathlib import Path
 from subprocess import check_call
-from typing import Iterable
 from ..common import Extraction, PathIsh, get_tmpdir, slugify

promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

promnesia 1.2.20230515py3-none-any.whl → 1.3.20241021py3-none-any.whl