PyPI - promnesia - Versions diffs - 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl - Mend

promnesia 1.3.20241021py3-none-any.whl → 1.4.20250909py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

promnesia/__init__.py +4 -1
promnesia/__main__.py +72 -59
promnesia/cannon.py +90 -89
promnesia/common.py +74 -62
promnesia/compare.py +15 -10
promnesia/config.py +22 -17
promnesia/database/dump.py +1 -2
promnesia/extract.py +6 -6
promnesia/logging.py +27 -15
promnesia/misc/install_server.py +25 -19
promnesia/server.py +69 -53
promnesia/sources/auto.py +65 -51
promnesia/sources/browser.py +7 -2
promnesia/sources/browser_legacy.py +51 -40
promnesia/sources/demo.py +0 -1
promnesia/sources/fbmessenger.py +0 -1
promnesia/sources/filetypes.py +15 -11
promnesia/sources/github.py +4 -1
promnesia/sources/guess.py +4 -1
promnesia/sources/hackernews.py +5 -7
promnesia/sources/hpi.py +3 -1
promnesia/sources/html.py +4 -2
promnesia/sources/instapaper.py +1 -0
promnesia/sources/markdown.py +4 -4
promnesia/sources/org.py +17 -8
promnesia/sources/plaintext.py +14 -11
promnesia/sources/pocket.py +2 -1
promnesia/sources/reddit.py +5 -8
promnesia/sources/roamresearch.py +3 -1
promnesia/sources/rss.py +4 -5
promnesia/sources/shellcmd.py +3 -6
promnesia/sources/signal.py +14 -14
promnesia/sources/smscalls.py +0 -1
promnesia/sources/stackexchange.py +2 -2
promnesia/sources/takeout.py +14 -21
promnesia/sources/takeout_legacy.py +16 -10
promnesia/sources/telegram.py +7 -3
promnesia/sources/telegram_legacy.py +5 -5
promnesia/sources/twitter.py +1 -1
promnesia/sources/vcs.py +6 -3
promnesia/sources/viber.py +2 -2
promnesia/sources/website.py +4 -3
promnesia/sqlite.py +10 -7
promnesia/tests/common.py +2 -0
promnesia/tests/server_helper.py +2 -2
promnesia/tests/sources/test_filetypes.py +9 -7
promnesia/tests/sources/test_hypothesis.py +7 -3
promnesia/tests/sources/test_org.py +7 -2
promnesia/tests/sources/test_plaintext.py +9 -7
promnesia/tests/sources/test_shellcmd.py +10 -9
promnesia/tests/test_cannon.py +254 -237
promnesia/tests/test_cli.py +8 -2
promnesia/tests/test_compare.py +16 -12
promnesia/tests/test_db_dump.py +4 -3
promnesia/tests/test_extract.py +7 -4
promnesia/tests/test_indexer.py +10 -10
promnesia/tests/test_server.py +10 -10
promnesia/tests/utils.py +1 -5
promnesia-1.4.20250909.dist-info/METADATA +66 -0
promnesia-1.4.20250909.dist-info/RECORD +80 -0
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
promnesia/kjson.py +0 -122
promnesia/sources/__init__.pyi +0 -0
promnesia-1.3.20241021.dist-info/METADATA +0 -55
promnesia-1.3.20241021.dist-info/RECORD +0 -83
promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0

promnesia/sources/takeout_legacy.py CHANGED Viewed

@@ -13,9 +13,11 @@ def index() -> Results:
     # although could raise a warning on top level, when source emitted no takeouts
     # TODO youtube?
+    # fmt: off
     google_activities = [read_google_activity(t)      for t in takeouts]
     search_activities = [read_search_activity(t)      for t in takeouts]
     browser_histories = [read_browser_history_json(t) for t in takeouts]
+    # fmt: on
     key = lambda v: (v.dt, v.url)
     return chain(
@@ -25,14 +27,12 @@ def index() -> Results:
     )
 import json
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timezone
 from itertools import chain
 from pathlib import Path
-import pytz
 from more_itertools import unique_everseen
 from promnesia import config
@@ -42,6 +42,7 @@ try:
 except ModuleNotFoundError as me:
     if me.name != 'cachew':
         raise me
     # this module is legacy anyway, so just make it defensive
     def cachew(*args, **kwargs):  # type: ignore[no-redef]
         return lambda f: f
@@ -53,7 +54,7 @@ TakeoutPath = Path
 def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
     # FIXME switch to actual kompress? and use CPath?
-    from my.core.kompress import kexists
+    from my.core.kompress import kexists  # type: ignore[attr-defined]
     # TODO glob
     # TODO not sure about windows path separators??
@@ -65,6 +66,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
     locator = Loc.file(spath)
     from my.google.takeout.html import read_html
     for dt, url, _title in read_html(takeout, spath):
         yield Visit(
             url=url,
@@ -73,6 +75,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
             debug=kind,
         )
 def _cpath(suffix: str):
     def fun(takeout: TakeoutPath):
         cache_dir = config.get().cache_dir
@@ -80,23 +83,27 @@ def _cpath(suffix: str):
             return None
         # doesn't need a nontrivial hash function, timestsamp is encoded in name
         return cache_dir / (takeout.name + '_' + suffix + '.cache')
     return fun
 # todo caching should this be HPI responsibility?
 # todo set global cachew logging on init?
-@cachew(cache_path=_cpath('google_activity') , logger=logger)
+@cachew(cache_path=_cpath('google_activity'), logger=logger)
 def read_google_activity(takeout: TakeoutPath) -> Iterable[Visit]:
     return _read_myactivity_html(takeout, 'Chrome/MyActivity.html')
-@cachew(cache_path=_cpath('search_activity') , logger=logger)
+@cachew(cache_path=_cpath('search_activity'), logger=logger)
 def read_search_activity(takeout: TakeoutPath) -> Iterable[Visit]:
     return _read_myactivity_html(takeout, 'Search/MyActivity.html')
 # TODO add this to tests?
 @cachew(cache_path=_cpath('browser_activity'), logger=logger)
 def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
-    from my.core.kompress import kexists, kopen
+    from my.core.kompress import kexists, kopen  # type: ignore[attr-defined]
     # not sure if this deserves moving to HPI? it's pretty trivial for now
     spath = 'Takeout/Chrome/BrowserHistory.json'
@@ -111,13 +118,13 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
     # TODO this should be supported by HPI now?
     j = None
-    with kopen(takeout, spath) as fo: # TODO iterative parser?
+    with kopen(takeout, spath) as fo:  # TODO iterative parser?
         j = json.load(fo)
     hist = j['Browser History']
     for item in hist:
         url = item['url']
-        time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
+        time = datetime.fromtimestamp(item['time_usec'] / 10**6, tz=timezone.utc)
         # TODO any more interesitng info?
         yield Visit(
             url=url,
@@ -125,4 +132,3 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
             locator=locator,
             debug='Chrome/BrowserHistory.json',
         )

promnesia/sources/telegram.py CHANGED Viewed

@@ -6,7 +6,7 @@ from urllib.parse import unquote  # TODO mm, make it easier to rememember to use
 from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
-def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False)  -> Results:
+def index(database: PathIsh | None = None, *, http_only: bool = False, with_extra_media_info: bool = False) -> Results:
     if database is None:
         # fully relying on HPI
         yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -17,7 +17,9 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
         f'Will try to hack database path {database} into HPI config.'
     )
     try:
-        yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
+        yield from _index_new_with_adhoc_config(
+            database=database, http_only=http_only, with_extra_media_info=with_extra_media_info
+        )
     except Exception as e:
         logger.exception(e)
         warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
@@ -30,11 +32,12 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
 def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
     from . import telegram_legacy
     yield from telegram_legacy.index(database=database, http_only=http_only)
 def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
-    from . import hpi  # noqa: F401,I001
+    from . import hpi  # noqa: F401
     class config:
         class telegram:
@@ -42,6 +45,7 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
                 export_path: PathIsh = database
     from my.core.cfg import tmp_config
     with tmp_config(modules='my.telegram.telegram_backup', config=config):
         yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)

promnesia/sources/telegram_legacy.py CHANGED Viewed

@@ -18,7 +18,6 @@ from promnesia.common import (
     echain,
     extract_urls,
     from_epoch,
-    get_logger,
 )
 from ..sqlite import sqlite_connection
@@ -32,15 +31,13 @@ def unwrap(res: T | Exception) -> T:
     return res
-def index(database: PathIsh, *, http_only: bool=False) -> Results:
+def index(database: PathIsh, *, http_only: bool = False) -> Results:
     """
     :param database:
         the path of the sqlite generated by the _telegram_backup_ java program
     :param http_only:
         when true, do not collect IP-addresses and `python.py` strings
     """
-    logger = get_logger()
     path = Path(database)
     assert path.is_file(), path
@@ -77,7 +74,8 @@ def index(database: PathIsh, *, http_only: bool=False) -> Results:
                 M.message_type NOT IN ('service_message', 'empty_message')
                 {extra_criteria}
             ORDER BY time;
-            """)
+            """
+        )
     with sqlite_connection(path, immutable=True, row_factory='row') as db:
         # TODO yield error if chatname or chat or smth else is null?
@@ -105,6 +103,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
     urls = extract_urls(text)
     if len(urls) == 0:
         return
+    # fmt: off
     dt            = from_epoch(row['time'])
     mid: str      = unwrap(row['mid'])
@@ -112,6 +111,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
     sender: str   = unwrap(row['sender'])
     chatname: str = unwrap(row['chatname'])
     chat: str     = unwrap(row['chat'])
+    # fmt: on
     in_context = f'https://t.me/{chat}/{mid}'
     for u in urls:

promnesia/sources/twitter.py CHANGED Viewed

@@ -23,7 +23,7 @@ def index() -> Results:
         processed += 1
         try:
             urls = t.urls
-        except Exception as e: # just in case..
+        except Exception as e:  # just in case..
             yield e
             urls = []

promnesia/sources/vcs.py CHANGED Viewed

@@ -1,6 +1,7 @@
 '''
 Clones & indexes Git repositories (via sources.auto)
 '''
 from __future__ import annotations
 import re
@@ -22,7 +23,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
     # note: https://bugs.python.org/issue33617 , it doesn't like Path here on Windows
     check_call(['git', 'clone', repo, str(tp)])
-    def replacer(p: PathIsh, prefix: str=str(tp), repo: str=repo) -> str:
+    def replacer(p: PathIsh, prefix: str = str(tp), repo: str = repo) -> str:
         ps = str(p)
         # TODO prefix is a bit misleading
         pos = ps.find(prefix)
@@ -31,13 +32,15 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
             return ps
         # TODO ugh. seems that blame view https://github.com/davidgasquez/handbook/blame/master/README.md#L25 is the most reliable
         # in raw mode can't jump onto line, when markdown is renderend can't jump either
-        rest = ps[pos + len(prefix):]
-        rest = re.sub(r':(\d+)$', r'#L\1', rest) # patch line number...
+        rest = ps[pos + len(prefix) :]
+        rest = re.sub(r':(\d+)$', r'#L\1', rest)  # patch line number...
         return repo + '/blame/master' + rest
         # TODO doesn't work for git:
         # TODO think about something more generic... this isn't too sustainable
     # TODO not sure if context should be local or github?...
     from . import auto
     yield from auto.index(tp, *args, replacer=replacer, **kwargs)

promnesia/sources/viber.py CHANGED Viewed

@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
 def index(
     db_path: PathIsh = "~/.ViberPC/*/viber.db",
-    locator_schema: str="editor",
+    locator_schema: str = "editor",
     *,
     http_only: bool = False,
 ) -> Results:
@@ -40,7 +40,7 @@ def index(
         yield from _harvest_db(db, msgs_query, locator_schema)
-def messages_query(http_only: bool | None) -> str:
+def messages_query(http_only: bool | None) -> str:  # noqa: FBT001
     """
     An SQL-query returning 1 row for each message

promnesia/sources/website.py CHANGED Viewed

@@ -27,7 +27,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
         '-A', 'html,html,txt', # TODO eh, ideally would use mime type I guess...
         '--no-parent',
         url,
-    ]
+    ]  # fmt: skip
     # TODO follow sitemap? e.g. gwern
     logger.info(' '.join(cmd))
     res = run(cmd, check=False)
@@ -39,12 +39,12 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
         # rest of the errors are a bit more critical..
         res.check_returncode()
-    def replacer(p: PathIsh, prefix: str=str(tp), url: str=url) -> str:
+    def replacer(p: PathIsh, prefix: str = str(tp), url: str = url) -> str:
         ps = str(p)
         pos = ps.find(prefix)
         if pos == -1:
             return ps
-        rest = ps[pos + len(prefix):]
+        rest = ps[pos + len(prefix) :]
         # now this should look kinda like /domain.tld/rest (due to the way wget downloads stuff)
         rest = re.sub(r'/.*?/', '/', rest)
         return url + rest
@@ -54,4 +54,5 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
     # TODO smarter html handling
     from . import auto
     yield from auto.index(tp, *args, replacer=replacer, **kwargs)

promnesia/sqlite.py CHANGED Viewed

@@ -1,25 +1,28 @@
 from __future__ import annotations
 import sqlite3
-from collections.abc import Iterator
+from collections.abc import Callable, Iterator
 from contextlib import contextmanager
-from typing import Any, Callable, Literal, Union
-from .common import PathIsh
+from pathlib import Path
+from typing import Any, Literal
 # NOTE: copy pasted from HPI
 SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
 def dict_factory(cursor, row):
     fields = [column[0] for column in cursor.description]
-    return dict(zip(fields, row))
+    return dict(zip(fields, row, strict=True))
+Factory = SqliteRowFactory | Literal['row', 'dict']
-Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
 @contextmanager
-def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Factory | None=None) -> Iterator[sqlite3.Connection]:
+def sqlite_connection(
+    db: Path | str, *, immutable: bool = False, row_factory: Factory | None = None
+) -> Iterator[sqlite3.Connection]:
     dbp = f'file:{db}'
     # https://www.sqlite.org/draft/uri.html#uriimmutable
     if immutable:

promnesia/tests/common.py CHANGED Viewed

@@ -59,6 +59,7 @@ def get_testdata(path: str) -> Path:
 @contextmanager
 def tmp_popen(*args, **kwargs):
     import psutil
     with psutil.Popen(*args, **kwargs) as p:
         try:
             yield p
@@ -99,6 +100,7 @@ def reset_filters():
 # TODO could be a TypeGuard from 3.10
 V = TypeVar('V')
 def unwrap(r: Res[V]) -> V:
     assert not isinstance(r, Exception), r
     return r

promnesia/tests/server_helper.py CHANGED Viewed

@@ -45,8 +45,8 @@ def run_server(db: PathIsh | None = None, *, timezone: str | None = None) -> Ite
             '--quiet',
             '--port', port,
             *([] if timezone is None else ['--timezone', timezone]),
-            *([] if db is None else ['--db'  , str(db)]),
-        ]
+            *([] if db is None else ['--db', str(db)]),
+        ]  # fmt: skip
         with tmp_popen(promnesia_bin(*args)) as server_process:
             server = Helper(host=host, port=port, process=server_process)

promnesia/tests/sources/test_filetypes.py CHANGED Viewed

@@ -13,16 +13,16 @@ def handled(p: PathIsh) -> bool:
 def test_filetypes() -> None:
     # test media
-    for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()):
+    for ext in ['avi', 'mp4', 'mp3', 'webm'] + ([] if windows else ['mkv']):
         assert handled('file.' + ext)
     # images
-    for ext in 'gif jpg png jpeg'.split():
+    for ext in ['gif', 'jpg', 'png', 'jpeg']:
         assert handled('file.' + ext)
     # TODO more granual checks that these are ignored?
     # binaries
-    for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()):
+    for ext in ['o', 'sqlite'] + ([] if windows else ['class', 'jar']):
         assert handled('file.' + ext)
     # these might have potentially some links
@@ -31,13 +31,15 @@ def test_filetypes() -> None:
         'pdf', 'epub', 'ps',
         'doc', 'ppt', 'xsl',
         # seriously, windows doesn't know about docx???
-        *([] if windows else 'docx pptx xlsx'.split()),
-        *([] if windows else 'ods odt rtf'.split()),
-    ] + ([] if windows else 'djvu'.split()):
+        *([] if windows else ['docx', 'pptx', 'xlsx']),
+        *([] if windows else ['ods', 'odt', 'rtf']),
+    ] + ([] if windows else ['djvu']):  # fmt: skip
         assert handled('file.' + ext)
     # source code
-    for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()):
+    for ext in ['rs', 'tex', 'el', 'js', 'sh', 'hs', 'pl', 'h', 'py', 'hpp', 'c', 'go', 'css'] + (
+        [] if windows else ['java', 'cpp']
+    ):
         assert handled('file.' + ext)
     assert handled('x.html')

promnesia/tests/sources/test_hypothesis.py CHANGED Viewed

@@ -12,7 +12,7 @@ def index_hypothesis(tmp_path: Path) -> None:
         from promnesia.common import Source
         from promnesia.sources import hypothesis
-        SOURCES = [Source(hypothesis.index, name='hyp')]
+        SOURCES = [Source(hypothesis.index, name='hyp')]  # noqa: F841
     cfg_path = tmp_path / 'config.py'
     write_config(cfg_path, cfg)
@@ -35,5 +35,9 @@ def test_hypothesis(tmp_path: Path) -> None:
     assert vis.norm_url == 'wired.com/2017/04/the-myth-of-a-superhuman-ai'
     assert vis.orig_url == 'https://www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
-    assert vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
-    assert 'misconception about evolution is fueling misconception about AI' in (vis.context or '')  # contains notes as well
+    assert (
+        vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
+    )
+    assert 'misconception about evolution is fueling misconception about AI' in (
+        vis.context or ''
+    )  # contains notes as well

promnesia/tests/sources/test_org.py CHANGED Viewed

@@ -14,7 +14,9 @@ def delrf(s: str | None) -> str | None:
 def test_org_indexer() -> None:
-    [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))]
+    [_, cpp, cozy] = [
+        v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))
+    ]
     assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc'
     # TODO not sure about filetags?
@@ -33,7 +35,10 @@ def test_org_indexer_2() -> None:
     items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))]
     assert len(items) == 6
-    assert items[0].url == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
+    assert (
+        items[0].url
+        == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
+    )
     assert items[1].url == 'https://link.com'
     assert items[-2].url == 'https://en.wikipedia.org/wiki/Resilio_Sync'
     # TODO shit def need org specific url extractor (and then extract from everything remaining)

promnesia/tests/sources/test_plaintext.py CHANGED Viewed

@@ -5,13 +5,15 @@ from ..common import get_testdata, unwrap
 def test_plaintext_path_extractor() -> None:
-    visits = list(extract_visits(
-        Source(
-            shellcmd.index,
-            plaintext.extract_from_path(get_testdata('custom')),
-        ),
-        src='whatever',
-    ))
+    visits = list(
+        extract_visits(
+            Source(
+                shellcmd.index,
+                plaintext.extract_from_path(get_testdata('custom')),
+            ),
+            src='whatever',
+        )
+    )
     assert {unwrap(v).orig_url for v in visits} == {
         'http://google.com',
         'http://google.com/',

promnesia/tests/sources/test_shellcmd.py CHANGED Viewed

@@ -8,14 +8,15 @@ from ..common import get_testdata
 @pytest.mark.skipif(_is_windows, reason="no grep on windows")
 def test_via_grep() -> None:
-    visits = list(extract_visits(
-        Source(
-            shellcmd.index,
-            # meh. maybe should deprecate plain string here...
-            r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
-        ),
-        src='whatever',
-    ))
+    visits = list(
+        extract_visits(
+            Source(
+                shellcmd.index,
+                # meh. maybe should deprecate plain string here...
+                r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
+            ),
+            src='whatever',
+        )
+    )
     # TODO I guess filtering of equivalent urls should rather be tested on something having context (e.g. org mode)
     assert len(visits) == 5

promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

promnesia 1.3.20241021py3-none-any.whl → 1.4.20250909py3-none-any.whl