PyPI - promnesia - Versions diffs - 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl - Mend

promnesia 1.1.20230129py3-none-any.whl → 1.2.20240810py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

promnesia/__main__.py +58 -50
promnesia/cannon.py +4 -4
promnesia/common.py +57 -38
promnesia/compare.py +3 -2
promnesia/compat.py +6 -65
promnesia/config.py +4 -2
promnesia/database/common.py +66 -0
promnesia/database/dump.py +187 -0
promnesia/{read_db.py → database/load.py} +10 -11
promnesia/extract.py +1 -0
promnesia/kjson.py +1 -1
promnesia/logging.py +14 -14
promnesia/misc/__init__.pyi +0 -0
promnesia/misc/config_example.py +1 -2
promnesia/misc/install_server.py +5 -4
promnesia/server.py +24 -24
promnesia/sources/__init__.pyi +0 -0
promnesia/sources/auto.py +12 -7
promnesia/sources/browser.py +80 -293
promnesia/sources/browser_legacy.py +298 -0
promnesia/sources/demo.py +18 -2
promnesia/sources/filetypes.py +8 -0
promnesia/sources/github.py +2 -2
promnesia/sources/hackernews.py +1 -2
promnesia/sources/hypothesis.py +1 -1
promnesia/sources/markdown.py +15 -15
promnesia/sources/org.py +7 -3
promnesia/sources/plaintext.py +3 -1
promnesia/sources/reddit.py +2 -2
promnesia/sources/rss.py +5 -1
promnesia/sources/shellcmd.py +6 -2
promnesia/sources/signal.py +29 -20
promnesia/sources/smscalls.py +8 -1
promnesia/sources/stackexchange.py +2 -2
promnesia/sources/takeout.py +132 -12
promnesia/sources/takeout_legacy.py +10 -2
promnesia/sources/telegram.py +79 -123
promnesia/sources/telegram_legacy.py +117 -0
promnesia/sources/vcs.py +1 -1
promnesia/sources/viber.py +6 -15
promnesia/sources/website.py +1 -1
promnesia/sqlite.py +42 -0
promnesia/tests/__init__.py +0 -0
promnesia/tests/common.py +137 -0
promnesia/tests/server_helper.py +64 -0
promnesia/tests/sources/__init__.py +0 -0
promnesia/tests/sources/test_auto.py +66 -0
promnesia/tests/sources/test_filetypes.py +42 -0
promnesia/tests/sources/test_hypothesis.py +39 -0
promnesia/tests/sources/test_org.py +65 -0
promnesia/tests/sources/test_plaintext.py +26 -0
promnesia/tests/sources/test_shellcmd.py +22 -0
promnesia/tests/sources/test_takeout.py +58 -0
promnesia/tests/test_cannon.py +325 -0
promnesia/tests/test_cli.py +42 -0
promnesia/tests/test_compare.py +30 -0
promnesia/tests/test_config.py +290 -0
promnesia/tests/test_db_dump.py +223 -0
promnesia/tests/test_extract.py +61 -0
promnesia/tests/test_extract_urls.py +43 -0
promnesia/tests/test_indexer.py +245 -0
promnesia/tests/test_server.py +292 -0
promnesia/tests/test_traverse.py +41 -0
promnesia/tests/utils.py +35 -0
{promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +14 -19
promnesia-1.2.20240810.dist-info/RECORD +83 -0
{promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
{promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
promnesia/dump.py +0 -105
promnesia-1.1.20230129.dist-info/RECORD +0 -55
{promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
{promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0

promnesia/sources/signal.py CHANGED Viewed

@@ -1,12 +1,13 @@
 """
-Harvest visits from Signal Desktop's chiphered SQLIite db(s).
+Collects visits from Signal Desktop's encrypted SQLIite db(s).
+"""
-Functions get their defaults from module-data.
+# Functions get their defaults from module-data.
+#
+# * Open-ciphered-db adapted from:
+#   https://github.com/carderne/signal-export/commit/2284c8f4
+# * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
-* Open-ciphered-db adapted from:
-  https://github.com/carderne/signal-export/commit/2284c8f4
-* Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
-"""
 import json
 import logging
@@ -62,6 +63,8 @@ def index(
     logger.debug("Paths to harvest: %s", db_paths)
     if not http_only:
         sql_query = f"{messages_query}\nWHERE body LIKE '%http%'"
+    else:
+        sql_query = messages_query
     for db_path in resolved_db_paths:
         logger.info("Ciphered db to harvest %s", db_path)
@@ -105,12 +108,18 @@ messages_query = dedent(
         SELECT
             id,
             type,
-            coalesce(name, profileName, profileFamilyName, e164) as aname,
+            coalesce(
+                profileFullName,
+                profileName,
+                name,
+                profileFamilyName,
+                e164
+            ) as aname,
             name,
             profileName,
             profileFamilyName,
             e164,
-            uuid
+            serviceId
         FROM conversations
     ),
     Msgs AS (
@@ -122,8 +131,8 @@ messages_query = dedent(
                 M.received_at,
                 M.sent_at
             ) AS timestamp,
-            IIF(M.type = "outgoing",
-                "Me (" || C2.aname || ")",
+            IIF(M.type = 'outgoing',
+                'Me (' || C2.aname || ')',
                 C2.aname
             ) AS sender,
             M.conversationId AS cid,
@@ -137,7 +146,7 @@ messages_query = dedent(
         INNER JOIN Cons AS C1
             ON M.conversationId = C1.id
         INNER JOIN Cons AS C2
-            ON M.sourceUuid = C2.uuid
+            ON M.sourceServiceId = C2.serviceId
     )
     SELECT id, timestamp, sender, cid, chatname, body
     FROM Msgs
@@ -187,8 +196,8 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
 def _expand_paths(paths: PathIshes) -> Iterable[Path]:
     if _is_pathish(paths):
-        paths = [paths]  # type: ignore[assignment,list-item]
-    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr,list-item]
+        paths = [paths]  # type: ignore[list-item]
+    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr]
 def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]:
@@ -235,7 +244,7 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
             )
         if db_paths and append:
-            db_paths = [  # type: ignore[misc,assignment]
+            db_paths = [  # type: ignore[assignment]
                 *([db_paths] if _is_pathish(db_paths) else db_paths),
                 plat_paths,
             ]
@@ -309,8 +318,8 @@ def connect_db(
             sql_cmds.extend(
                 [
                     f"ATTACH DATABASE '{decrypted_file}' AS plaintext KEY '';",
-                    f"SELECT sqlcipher_export('plaintext');",
-                    f"DETACH DATABASE plaintext;",
+                    "SELECT sqlcipher_export('plaintext');",
+                    "DETACH DATABASE plaintext;",
                 ]
             )
             sql = "\n".join(sql_cmds)
@@ -319,7 +328,7 @@ def connect_db(
                 "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
             )
             try:
-                sbp.run(  # type: ignore[call-overload]
+                sbp.run(
                     cmd,
                     check=True,
                     input=sql,
@@ -334,7 +343,7 @@ def connect_db(
                 ) from None
             db = sqlite3.connect(f"file:{decrypted_file}?mode=ro", uri=True)
         else:
-            from sqlcipher3 import dbapi2  # type: ignore[import]
+            from sqlcipher3 import dbapi2  # type: ignore[import-not-found]
             db = dbapi2.connect(f"file:{db_path}?mode=ro", uri=True)
             # Param-binding doesn't work for pragmas, so use a direct string concat.
@@ -418,9 +427,9 @@ def _harvest_db(
     with connect_db(db_path, key, decrypt_db=decrypt_db, **decryption_pragmas) as db:
         for mid, tstamp, sender, cid, chatname, text in db.execute(messages_query):
+            tstamp = from_epoch(tstamp / 1000.0)
+            row = (mid, tstamp, sender, cid, chatname, text)
             try:
-                tstamp = from_epoch(tstamp / 1000.0)
-                row = (mid, tstamp, sender, cid, chatname, text)
                 yield from _handle_row(row, db_path, locator_schema)
             except Exception as ex:
                 # TODO: also insert errors in db

promnesia/sources/smscalls.py CHANGED Viewed

@@ -11,11 +11,18 @@ def index() -> Results:
     for m in messages():
+        if isinstance(m, Exception):
+            yield m
+            continue
         urls = extract_urls(m.message)
         if len(urls) == 0:
             continue
-        loc = Loc(title=f"SMS with {m.who} ({m.phone_number})")
+        if m.who is None:
+            loc = Loc(title=f"SMS with {m.phone_number}")
+        else:
+            loc = Loc(title=f"SMS with {m.who} ({m.phone_number})")
         for u in urls:
             yield Visit(

promnesia/sources/stackexchange.py CHANGED Viewed

@@ -2,12 +2,12 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
 '''
-from ..common import Results, Visit, Loc, extract_urls
+from ..common import Results, Visit, Loc
 def index() -> Results:
     from . import hpi
-    import my.stackexchange.gdpr as G # type: ignore[import] # TODO eh, not sure if should run against pypi or not...
+    import my.stackexchange.gdpr as G
     for v in G.votes():
         if isinstance(v, Exception):
             yield v

promnesia/sources/takeout.py CHANGED Viewed

@@ -1,19 +1,26 @@
 '''
 Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
 '''
-from typing import Iterable, Set, Type
+from typing import Iterable, Set, Any, NamedTuple
 import warnings
 from ..common import Visit, Loc, Results, logger
 from ..compat import removeprefix
+# incase user is using an old version of google_takeout_parser
+class YoutubeCSVStub(NamedTuple):
+    contentJSON: str
 def index() -> Results:
     from . import hpi
+    import json
     try:
         from my.google.takeout.parser import events
-        from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory, PlayStoreAppInstall, Location
+        from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
+        from google_takeout_parser.parse_csv import reconstruct_comment_content, extract_comment_links
     except ModuleNotFoundError as ex:
         logger.exception(ex)
         yield ex
@@ -24,17 +31,30 @@ def index() -> Results:
         yield from takeout_legacy.index()
         return
-    _seen: Set[Type] = {
+    _seen: Set[str] = {
         # these are definitely not useful for promnesia
-        Location,
-        PlayStoreAppInstall,
+        'Location',
+        'PlaceVisit',
+        'PlayStoreAppInstall',
     }
-    def warn_once_if_not_seen(e) -> Iterable[Exception]:
-        et = type(e)
-        if et in _seen:
+    imported_yt_csv_models = False
+    try:
+        from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
+        imported_yt_csv_models = True
+    except ImportError:
+        # warn user to upgrade google_takeout_parser
+        warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
+        CSVYoutubeComment = YoutubeCSVStub  # type: ignore[misc,assignment]
+        CSVYoutubeLiveChat = YoutubeCSVStub  # type: ignore[misc,assignment]
+    def warn_once_if_not_seen(e: Any) -> Iterable[Exception]:
+        et_name = type(e).__name__
+        if et_name in _seen:
             return
-        _seen.add(et)
-        yield RuntimeError(f"Unhandled event {et}: {e}")
+        _seen.add(et_name)
+        yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
     for e in events():
         if isinstance(e, Exception):
@@ -48,12 +68,37 @@ def index() -> Results:
                 # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
                 # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
                 url = removeprefix(url, "https://www.google.com/url?q=")
+                title = e.title
+                if e.header == 'Chrome':
+                    # title contains 'Visited <page title>' in this case
+                    context = None
+                    title = removeprefix(title, 'Visited ')
+                elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
+                    # todo perhaps could add to some sort of metadata?
+                    # only useful for debugging really
+                    context = None
+                elif e.header in url:
+                    # stuff like News only has domain name in the header -- completely useless for promnesia
+                    context = None
+                elif e.title == f'Used {e.header}':
+                    # app usage tracking -- using app name as context is useless here
+                    context = None
+                elif e.products == ['Android']:
+                    # seems to be coming from in-app browser, header contains app name in this case
+                    context = None
+                elif e.products == ['Ads']:
+                    # header contains some weird internal ad id in this case
+                    context = None
+                else:
+                    context = None
+                # NOTE: at this point seems that context always ends up as None (at least for @karlicoss as of 20230131)
+                # so alternatively could just force it to be None instead of manual dispatching :shrug:
                 yield Visit(
                     url=url,
                     dt=e.time,
-                    context=e.header,
-                    locator=Loc(title=e.title, href=url),
+                    context=context,
+                    locator=Loc(title=title, href=url),
                 )
             for s in e.subtitles:
                 surl = s[1]
@@ -73,6 +118,8 @@ def index() -> Results:
                 locator=Loc(title=e.title, href=e.url),
             )
         elif isinstance(e, LikedYoutubeVideo):
+            # TODO not sure if desc makes sense here since it's not user produced data
+            # it's just a part of video meta?
             yield Visit(
                 url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link)
             )
@@ -83,5 +130,78 @@ def index() -> Results:
                 yield Visit(
                     url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
                 )
+        elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
+            contentJSON = json.loads(e.contentJSON)
+            content = reconstruct_comment_content(contentJSON, format='text')
+            if isinstance(content, Exception):
+                yield content
+                continue
+            links = extract_comment_links(contentJSON)
+            if isinstance(links, Exception):
+                yield links
+                continue
+            context = f"Commented on {e.video_url}"
+            for url in links:
+                yield Visit(
+                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
+                )
+            yield Visit(
+                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
+            )
+        elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
+            contentJSON = json.loads(e.contentJSON)
+            content = reconstruct_comment_content(contentJSON, format='text')
+            if isinstance(content, Exception):
+                yield content
+                continue
+            links = extract_comment_links(contentJSON)
+            if isinstance(links, Exception):
+                yield links
+                continue
+            context = f"Commented on livestream {e.video_url}"
+            for url in links:
+                yield Visit(
+                    url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
+                )
+            yield Visit(
+                url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
+            )
         else:
             yield from warn_once_if_not_seen(e)
+_CLEAR_CONTEXT_FOR_HEADERS = {
+    'Google Cloud',
+    'Travel',
+    'Google Arts & Culture',
+    'Drive',
+    'Calendar',
+    'Google Store',
+    'Shopping',
+    'News',
+    'Help',
+    'Books',
+    'Google My Business',
+    'Google Play Movies & TV',
+    'Developers',
+    'YouTube',
+    'Gmail',
+    'Video Search',
+    'Google Apps',
+    'Google Translate',
+    'Ads',
+    'Image Search',
+    'Assistant',
+    'Google Play Store',
+    'Android',
+    'Maps',
+    'Search',
+    'Google App',
+    'in_app_display_context_client',
+    'Play Music',
+    'Maps - Navigate & Explore',
+    'Google Maps',
+    'google.com',
+    'Google Play Books',
+    'Maps - Navigation & Transit',
+}

promnesia/sources/takeout_legacy.py CHANGED Viewed

@@ -34,7 +34,15 @@ from .. import config
 from more_itertools import unique_everseen
-from cachew import cachew
+try:
+    from cachew import cachew
+except ModuleNotFoundError as me:
+    if me.name != 'cachew':
+        raise me
+    # this module is legacy anyway, so just make it defensive
+    def cachew(*args, **kwargs):  # type: ignore[no-redef]
+        return lambda f: f
 # TODO use CPath? Could encapsulate a path within an archive *or* within a directory
@@ -105,7 +113,7 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
     hist = j['Browser History']
     for item in hist:
         url = item['url']
-        time = datetime.utcfromtimestamp(item['time_usec'] / 10 ** 6).replace(tzinfo=pytz.utc)
+        time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
         # TODO any more interesitng info?
         yield Visit(
             url=url,

promnesia/sources/telegram.py CHANGED Viewed

@@ -1,128 +1,84 @@
-'''
-Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data
-'''
-from pathlib import Path
-from textwrap import dedent
-from typing import Optional, Union, TypeVar
+from typing import Optional
 from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
+import warnings
-from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain
-# TODO potentially, belongs to my. package
-T = TypeVar("T")
-def unwrap(res: Union[T, Exception]) -> T:
-    if isinstance(res, Exception):
-        raise res
-    else:
-        return res
-# TODO move to common?
-def dataset_readonly(db: Path):
-    import dataset # type: ignore
-    # see https://github.com/pudo/dataset/issues/136#issuecomment-128693122
-    import sqlite3
-    creator = lambda: sqlite3.connect(f'file:{db}?immutable=1', uri=True)
-    return dataset.connect('sqlite:///' , engine_kwargs={'creator': creator})
-def index(database: PathIsh, *, http_only: bool=False) -> Results:
-    """
-    :param database:
-        the path of the sqlite generated by the _telegram_backup_ java program
-    :param http_only:
-        when true, do not collect IP-addresses and `python.py` strings
-    """
-    logger = get_logger()
-    path = Path(database)
-    assert path.is_file(), path # TODO could check is_file inside `dataset_readonly()`
-    def make_query(text_query: str) -> str:
-        extra_criteria = "AND (M.has_media == 1 OR text LIKE '%http%')" if http_only else ""
-        return dedent(
-            f"""
-            WITH entities AS (
-            SELECT 'dialog' as type
-                , id
-                , coalesce(username, id) as handle
-                , coalesce(first_name || " " || last_name
-                    , username
-                    , id
-                ) as display_name FROM users
-            UNION
-            SELECT 'group' as type
-                , id
-                , id as handle
-                , coalesce(name, id) as display_name FROM chats
-            )
-            SELECT src.display_name AS chatname
-                , src.handle       AS chat
-                , snd.display_name AS sender
-                , M.time           AS time
-                , {text_query}     AS text
-                , M.id             AS mid
-            FROM messages AS M
-                                                                                /* chat types are 'dialog' (1-1), 'group' and 'supergroup' */
-                                                                                /* this is abit hacky way to handle all groups in one go */
-            LEFT JOIN entities AS src    ON M.source_id = src.id AND src.type = (CASE M.source_type WHEN 'supergroup' THEN 'group' ELSE M.source_type END)
-            LEFT JOIN entities AS snd    ON M.sender_id = snd.id AND snd.type = 'dialog'
-            WHERE
-                M.message_type NOT IN ('service_message', 'empty_message')
-                {extra_criteria}
-            ORDER BY time;
-            """)
-    # TODO context manager?
-    with dataset_readonly(path) as db:
-        # TODO yield error if chatname or chat or smth else is null?
-        for row in db.query(make_query('M.text')):
-            try:
-                yield from _handle_row(row)
-            except Exception as ex:
-                yield echain(RuntimeError(f'While handling {row}'), ex)
-                # , None, sys.exc_info()[2]
-                # TODO hmm. traceback isn't preserved; wonder if that's because it's too heavy to attach to every single exception object..
-        # old (also 'stable') version doesn't have 'json' column yet...
-        if 'json' in db['messages'].columns:
-            for row in db.query(make_query("json_extract(json, '$.media.webpage.description')")):
-                try:
-                    yield from _handle_row(row)
-                except Exception as ex:
-                    yield echain(RuntimeError(f'While handling {row}'), ex)
-def _handle_row(row) -> Results:
-    text = row['text']
-    if text is None:
+from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh
+def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False)  -> Results:
+    if database is None:
+        # fully relying on HPI
+        yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
         return
-    urls = extract_urls(text)
-    if len(urls) == 0:
+    warnings.warn(
+        f'Passing paths to promnesia.sources.telegram is deprecated, you should setup my.telegram.telegram_backup instead. '
+        f'Will try to hack database path {database} into HPI config.'
+    )
+    try:
+        yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
         return
-    dt            = from_epoch(row['time'])
-    mid: str      = unwrap(row['mid'])
-    # TODO perhaps we could be defensive with null sender/chat etc and still emit the Visit
-    sender: str   = unwrap(row['sender'])
-    chatname: str = unwrap(row['chatname'])
-    chat: str     = unwrap(row['chat'])
-    in_context = f'https://t.me/{chat}/{mid}'
-    for u in urls:
-        # https://www.reddit.com/r/Telegram/comments/6ufwi3/link_to_a_specific_message_in_a_channel_possible/
-        # hmm, only seems to work on mobile app, but better than nothing...
-        yield Visit(
-            url=unquote(u),
-            dt=dt,
-            context=f"{sender}: {text}",
-            locator=Loc.make(
-                title=f"chat with {chatname}",
-                href=in_context,
-            ),
+    except Exception as e:
+        logger.exception(e)
+        warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
+    logger.warning("Falling back onto promnesia.sources.telegram_legacy module")
+    yield from _index_legacy(database=database, http_only=http_only)
+def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
+    from . import telegram_legacy
+    yield from telegram_legacy.index(database=database, http_only=http_only)
+def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
+    from . import hpi
+    class config:
+        class telegram:
+            class telegram_backup:
+                export_path: PathIsh = database
+    from my.core.cfg import tmp_config
+    with tmp_config(modules='my.telegram.telegram_backup', config=config):
+        yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
+def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results:
+    from . import hpi
+    from my.telegram.telegram_backup import messages
+    extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None
+    for i, m in enumerate(messages(
+            with_extra_media_info=with_extra_media_info,
+            extra_where=extra_where,
+    )):
+        text = m.text
+        urls = extract_urls(text)
+        extra_media_info = m.extra_media_info
+        if extra_media_info is not None:
+            urls.extend(extract_urls(extra_media_info))
+        if len(urls) == 0:
+            continue
+        dt = m.time
+        sender = m.sender.name
+        chat = m.chat
+        cname = chat.name if chat.name is not None else str(chat.id)
+        locator = Loc.make(
+            title=f"chat with {cname}",
+            href=m.permalink,
         )
+        context = f'{sender}: {text}'
+        for u in urls:
+            yield Visit(
+                url=unquote(u),
+                dt=dt,
+                context=context,
+                locator=locator,
+            )

promnesia 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl

promnesia 1.1.20230129py3-none-any.whl → 1.2.20240810py3-none-any.whl