PyPI - promnesia - Versions diffs - 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl - Mend

promnesia 1.3.20241021py3-none-any.whl → 1.4.20250909py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

promnesia/__init__.py +4 -1
promnesia/__main__.py +72 -59
promnesia/cannon.py +90 -89
promnesia/common.py +74 -62
promnesia/compare.py +15 -10
promnesia/config.py +22 -17
promnesia/database/dump.py +1 -2
promnesia/extract.py +6 -6
promnesia/logging.py +27 -15
promnesia/misc/install_server.py +25 -19
promnesia/server.py +69 -53
promnesia/sources/auto.py +65 -51
promnesia/sources/browser.py +7 -2
promnesia/sources/browser_legacy.py +51 -40
promnesia/sources/demo.py +0 -1
promnesia/sources/fbmessenger.py +0 -1
promnesia/sources/filetypes.py +15 -11
promnesia/sources/github.py +4 -1
promnesia/sources/guess.py +4 -1
promnesia/sources/hackernews.py +5 -7
promnesia/sources/hpi.py +3 -1
promnesia/sources/html.py +4 -2
promnesia/sources/instapaper.py +1 -0
promnesia/sources/markdown.py +4 -4
promnesia/sources/org.py +17 -8
promnesia/sources/plaintext.py +14 -11
promnesia/sources/pocket.py +2 -1
promnesia/sources/reddit.py +5 -8
promnesia/sources/roamresearch.py +3 -1
promnesia/sources/rss.py +4 -5
promnesia/sources/shellcmd.py +3 -6
promnesia/sources/signal.py +14 -14
promnesia/sources/smscalls.py +0 -1
promnesia/sources/stackexchange.py +2 -2
promnesia/sources/takeout.py +14 -21
promnesia/sources/takeout_legacy.py +16 -10
promnesia/sources/telegram.py +7 -3
promnesia/sources/telegram_legacy.py +5 -5
promnesia/sources/twitter.py +1 -1
promnesia/sources/vcs.py +6 -3
promnesia/sources/viber.py +2 -2
promnesia/sources/website.py +4 -3
promnesia/sqlite.py +10 -7
promnesia/tests/common.py +2 -0
promnesia/tests/server_helper.py +2 -2
promnesia/tests/sources/test_filetypes.py +9 -7
promnesia/tests/sources/test_hypothesis.py +7 -3
promnesia/tests/sources/test_org.py +7 -2
promnesia/tests/sources/test_plaintext.py +9 -7
promnesia/tests/sources/test_shellcmd.py +10 -9
promnesia/tests/test_cannon.py +254 -237
promnesia/tests/test_cli.py +8 -2
promnesia/tests/test_compare.py +16 -12
promnesia/tests/test_db_dump.py +4 -3
promnesia/tests/test_extract.py +7 -4
promnesia/tests/test_indexer.py +10 -10
promnesia/tests/test_server.py +10 -10
promnesia/tests/utils.py +1 -5
promnesia-1.4.20250909.dist-info/METADATA +66 -0
promnesia-1.4.20250909.dist-info/RECORD +80 -0
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
promnesia/kjson.py +0 -122
promnesia/sources/__init__.pyi +0 -0
promnesia-1.3.20241021.dist-info/METADATA +0 -55
promnesia-1.3.20241021.dist-info/RECORD +0 -83
promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
{promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0

promnesia/sources/auto.py CHANGED Viewed

@@ -2,22 +2,23 @@
 - discovers files recursively
 - guesses the format (orgmode/markdown/json/etc) by the extension/MIME type
 - can index most of plaintext files, including source code!
-- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
-- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
+- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/auto_obsidian.py][promnesia.sources.obsidian]]
+- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/auto_logseq.py][promnesia.sources.logseq]]
 """
 from __future__ import annotations
 import csv
 import itertools
 import json
 import os
-from collections.abc import Iterable, Iterator, Sequence
+from collections.abc import Callable, Iterable, Iterator, Sequence
 from concurrent.futures import ProcessPoolExecutor as Pool
 from contextlib import nullcontext
 from fnmatch import fnmatch
 from functools import wraps
 from pathlib import Path
-from typing import Any, Callable, NamedTuple, Optional
+from typing import Any, NamedTuple
 from promnesia.common import (
     Loc,
@@ -71,6 +72,7 @@ def collect_from(thing) -> list[EUrl]:
 Urls = Iterator[EUrl]
 def _csv(path: Path) -> Urls:
     # TODO these could also have Loc to be fair..
     with path.open() as fo:
@@ -88,6 +90,7 @@ def _json(path: Path) -> Urls:
 def _plaintext(path: Path) -> Results:
     from . import shellcmd
     from .plaintext import extract_from_path
     yield from shellcmd.index(extract_from_path(path))
@@ -97,6 +100,7 @@ def fallback(ex):
     """Falls back to plaintext in case of issues"""
     fallback_active: dict[Any, bool] = {}
     @wraps(ex)
     def wrapped(path: Path):
         nonlocal fallback_active
@@ -110,79 +114,83 @@ def fallback(ex):
             except ModuleNotFoundError as me:
                 logger = get_logger()
                 logger.exception(me)
-                logger.warning('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
+                logger.warning(
+                    '%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!',
+                    path,
+                    me.name,
+                    me.name,
+                )
                 yield me
                 fallback_active[ex] = True
                 do_fallback = True
         if do_fallback:
             yield from _plaintext(path)
     return wrapped
 @fallback
 def _markdown(path: Path) -> Results:
     from . import markdown
     yield from markdown.extract_from_file(path)
 @fallback
 def _html(path: Path) -> Results:
     from . import html
     yield from html.extract_from_file(path)
 @fallback
 def _org(path: Path) -> Results:
     from . import org
     return org.extract_from_file(path)
 from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
-TYPE2IDX.update({
-    'application/json': _json,
-    '.json'           : _json,
-    '.ipynb'          : _json,
-    '.csv'           : _csv,
-    'application/csv': _csv,
-    '.org'        : _org,
-    '.org_archive': _org,
-    '.md'         : _markdown,
-    '.markdown'   : _markdown,
-    'text/plain'  : _plaintext,
-    '.txt'        : _plaintext,
-    '.page'       : _plaintext,
-    '.rst'        : _plaintext,
-    # TODO doesn't work that great; weird stuff like
-    # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
-    # TODO could have stricter url extraction for that; always using http/https?
-    # '.ipynb'      : _json,
-    '.html'    : _html,
-    'text/html': _html,
-    'text/xml' : _plaintext,
-    'text/x-po': _plaintext, # some translation files
-})
+TYPE2IDX.update(
+    {
+        'application/json': _json,
+        '.json': _json,
+        '.ipynb': _json,
+        '.csv': _csv,
+        'application/csv': _csv,
+        '.org': _org,
+        '.org_archive': _org,
+        '.md': _markdown,
+        '.markdown': _markdown,
+        'text/plain': _plaintext,
+        '.txt': _plaintext,
+        '.page': _plaintext,
+        '.rst': _plaintext,
+        # TODO doesn't work that great; weird stuff like
+        # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
+        # TODO could have stricter url extraction for that; always using http/https?
+        # '.ipynb'      : _json,
+        '.html': _html,
+        'text/html': _html,
+        'text/xml': _plaintext,
+        'text/x-po': _plaintext,  # some translation files
+    }
+)
 for t in CODE:
     TYPE2IDX[t] = _plaintext
 # TODO ok, mime doesn't really tell between org/markdown/etc anyway
-Replacer = Optional[Callable[[str, str], str]]
+Replacer = Callable[[str, str], str] | None
 def index(
-        *paths: PathIsh,
-        ignored: Sequence[str] | str=(),
-        follow: bool=True,
-        replacer: Replacer=None,
+    *paths: PathIsh,
+    ignored: Sequence[str] | str = (),
+    follow: bool = True,
+    replacer: Replacer = None,
 ) -> Results:
     '''
     path   : a path or list of paths to recursively index
@@ -215,13 +223,14 @@ def index(
         )
         yield from _index(apath, opts=opts)
 class Options(NamedTuple):
     ignored: Sequence[str]
     follow: bool
     # TODO option to add ignores? not sure..
     # TODO I don't like this replacer thing... think about removing it
     replacer: Replacer
-    root: Path | None=None
+    root: Path | None = None
 def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
@@ -237,14 +246,14 @@ def _index(path: Path, opts: Options) -> Results:
     logger = get_logger()
     cores = use_cores()
-    if cores is None: # do not use cores
+    if cores is None:  # do not use cores
         # todo use ExitStack instead?
         pool = nullcontext()
-        mapper = map # dummy pool
+        mapper = map  # dummy pool
     else:
         workers = None if cores == 0 else cores
-        pool = Pool(workers) # type: ignore
-        mapper = pool.map # type: ignore
+        pool = Pool(workers)  # type: ignore[assignment]
+        mapper = pool.map  # type: ignore[attr-defined]
     # iterate over resolved paths, to avoid duplicates
     def rit() -> Iterable[Path]:
@@ -254,7 +263,7 @@ def _index(path: Path, opts: Options) -> Results:
                 # TODO not sure if should log here... might end up with quite a bit of logs
                 logger.debug('ignoring %s: user ignore rules', p)
                 continue
-            if any(i in p.parts for i in IGNORE): # meh, not very efficient.. pass to traverse??
+            if any(i in p.parts for i in IGNORE):  # meh, not very efficient.. pass to traverse??
                 logger.debug('ignoring %s: default ignore rules', p)
                 continue
@@ -266,6 +275,7 @@ def _index(path: Path, opts: Options) -> Results:
             yield p
     from more_itertools import unique_everseen
     it = unique_everseen(rit())
     with pool:
@@ -302,9 +312,10 @@ def _index_file(pp: Path, opts: Options) -> Results:
     # TODO not even sure if it's used...
     suf = pp.suffix.lower()
-    if suf == '.xz': # TODO zstd?
+    if suf == '.xz':  # TODO zstd?
         import lzma
-        uname = pp.name[:-len('.xz')]  # chop off suffix, so the downstream indexer can handle it
+        uname = pp.name[: -len('.xz')]  # chop off suffix, so the downstream indexer can handle it
         assert pp.is_absolute(), pp
         # make sure to keep hierarchy, otherwise might end up with some name conflicts if filenames clash
@@ -359,7 +370,8 @@ def _index_file(pp: Path, opts: Options) -> Results:
             v = r
         loc = v.locator
-        if loc is not None and root is not None:
+        # FIXME double checke that v.locator indeed can't be none and remove the check?
+        if loc is not None and root is not None:  # type: ignore[redundant-expr]
             # meh. but it works
             # todo potentially, just use dataclasses instead...
             loc = loc._replace(title=loc.title.replace(str(root) + os.sep, ''))
@@ -369,7 +381,9 @@ def _index_file(pp: Path, opts: Options) -> Results:
             upd: dict[str, Any] = {}
             href = v.locator.href
             if href is not None:
-                upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))
+                upd['locator'] = v.locator._replace(
+                    href=replacer(href, str(root)), title=replacer(v.locator.title, str(root))
+                )
             ctx = v.context
             if ctx is not None:
                 # TODO in context, http is unnecessary

promnesia/sources/browser.py CHANGED Viewed

@@ -13,16 +13,17 @@ from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db,
 def index(p: PathIsh | None = None) -> Results:
-    from . import hpi  # noqa: F401,I001
+    from . import hpi  # noqa: F401
     if p is None:
         from my.browser.all import history
         yield from _index_new(history())
         return
     warnings.warn(
         f'Passing paths to promnesia.sources.browser is deprecated, you should setup my.browser.export instead. '
-        f'See https://github.com/seanbreckenridge/browserexport#hpi .'
+        f'See https://github.com/purarue/browserexport#hpi .'
         f'Will try to hack path to browser databases {p} into HPI config.'
     )
     try:
@@ -50,12 +51,14 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
     ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
     ## so we hack cachew path so it's different for each call
     from my.core.core_config import config as hpi_core_config
     hpi_cache_dir = hpi_core_config.get_cache_dir()
     sanitized_path = re.sub(r'\W', '_', str(path))
     cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
     ##
     from my.core.common import Paths, classproperty, get_files
     class config:
         class core:
             cache_dir = cache_override
@@ -67,8 +70,10 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
                     return tuple([f for f in get_files(path, glob='**/*') if is_sqlite_db(f)])
     from my.core.cfg import tmp_config
     with tmp_config(modules='my.browser.export|my.core.core_config', config=config):
         from my.browser.export import history
         yield from _index_new(history())

promnesia/sources/browser_legacy.py CHANGED Viewed

@@ -1,12 +1,10 @@
 from __future__ import annotations
 import sqlite3
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from urllib.parse import unquote
-import pytz
 from promnesia import config
 from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
@@ -15,6 +13,7 @@ try:
 except ModuleNotFoundError as me:
     if me.name != 'cachew':
         raise me
     # this module is legacy anyway, so just make it defensive
     def cachew(*args, **kwargs):  # type: ignore[no-redef]
         return lambda f: f
@@ -22,7 +21,7 @@ except ModuleNotFoundError as me:
 def index(p: PathIsh) -> Results:
     pp = Path(p)
-    assert pp.exists(), pp # just in case of broken symlinks
+    assert pp.exists(), pp  # just in case of broken symlinks
     # todo warn if filtered out too many?
     # todo wonder how quickly mimes can be computed?
@@ -31,14 +30,14 @@ def index(p: PathIsh) -> Results:
     assert len(dbs) > 0, pp
     logger.info('processing %d databases', len(dbs))
-    cname = str('_'.join(pp.parts[1:])) # meh
+    cname = str('_'.join(pp.parts[1:]))  # meh
     yield from _index_dbs(dbs, cachew_name=cname)
 def _index_dbs(dbs: list[Path], cachew_name: str):
     # TODO right... not ideal, need to think how to handle it properly...
     import sys
     sys.setrecursionlimit(5000)
     cache_dir = config.get().cache_dir
@@ -49,13 +48,13 @@ def _index_dbs(dbs: list[Path], cachew_name: str):
 # todo wow, stack traces are ridiculous here...
 # todo hmm, feels like it should be a class or something?
-@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)  # noqa: ARG005
+@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs)  # , logger=logger)  # noqa: ARG005
 def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results:
     if len(dbs) == 0:
         return
     xs = dbs[:-1]
-    x  = dbs[-1:]
+    x = dbs[-1:]
     xs_res = _index_dbs_aux(cache_path, xs, emitted)
     xs_was_cached = False
@@ -66,36 +65,38 @@ def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Re
             logger.debug('seems that %d first items were previously cached', len(xs))
         if xs_was_cached:
             key = str(r) if isinstance(r, Exception) else (r.url, r.dt)
-            assert key not in emitted, key # todo not sure if this assert is necessary?
+            assert key not in emitted, key  # todo not sure if this assert is necessary?
             # hmm ok it might happen if we messed up with indexing individual db?
             # alternatively, could abuse it to avoid messing with 'emitted' in _index_db?
             emitted.add(key)
-        yield r # todo not sure about exceptions?
+        yield r  # todo not sure about exceptions?
     for db in x:
         yield from _index_db(db, emitted=emitted)
 def _index_db(db: Path, emitted: set):
-    logger.info('processing %s', db) # debug level?
+    logger.info('processing %s', db)  # debug level?
     # todo schema check (not so critical for cachew though)
     total = 0
-    new   = 0
-    loc = Loc.file(db) # todo possibly needs to be optimized -- moving from within the loop considerably speeds everything up
+    new = 0
+    loc = Loc.file(
+        db
+    )  # todo possibly needs to be optimized -- moving from within the loop considerably speeds everything up
     with sqlite3.connect(f'file:{db}?immutable=1', uri=True) as c:
         browser = None
         for b in [Chrome, Firefox, FirefoxPhone, Safari]:
             try:
                 c.execute(f'SELECT * FROM {b.detector}')
-            except sqlite3.OperationalError: # not sure if the right kind?
+            except sqlite3.OperationalError:  # not sure if the right kind?
                 pass
             else:
                 browser = b
                 break
         assert browser is not None
-        proj  = ', '.join(c for c, _ in browser.schema.cols)
+        proj = ', '.join(c for c, _ in browser.schema.cols)
         query = browser.query.replace('chunk.', '')
         c.row_factory = sqlite3.Row
@@ -123,7 +124,7 @@ ColType = str
 from collections.abc import Sequence
-from typing import NamedTuple, Union
+from typing import NamedTuple
 class Schema(NamedTuple):
@@ -131,7 +132,7 @@ class Schema(NamedTuple):
     key: Sequence[str]
-SchemaCheck = tuple[str, Union[str, Sequence[str]]] # todo Union: meh
+SchemaCheck = tuple[str, str | Sequence[str]]  # todo Union: meh
 from dataclasses import dataclass
@@ -151,14 +152,15 @@ class Extr:
 class Chrome(Extr):
-    detector='keyword_search_terms'
+    detector = 'keyword_search_terms'
+    # fmt: off
     schema_check=(
         'visits', [
             'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration, incremented_omnibox_typed_score",
             'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration"
         ]
     )
-    schema=Schema(cols=[
+    schema = Schema(cols=[
         ('U.url'                                  , 'TEXT'   ),
         # while these two are not very useful, might be good to have just in case for some debugging
@@ -172,16 +174,17 @@ class Chrome(Extr):
         ('V.visit_duration'                       , 'INTEGER NOT NULL'),
         # V.omnibox thing looks useless
     ], key=('url', 'visit_time', 'vid', 'urlid'))
-    query='FROM chunk.visits as V, chunk.urls as U WHERE V.url = U.id'
+    # fmt: on
+    query = 'FROM chunk.visits as V, chunk.urls as U WHERE V.url = U.id'
     @staticmethod
     def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
-        url  = row['url']
-        ts   = row['visit_time']
+        url = row['url']
+        ts = row['visit_time']
         durs = row['visit_duration']
         dt = chrome_time_to_utc(int(ts))
-        url = unquote(url) # chrome urls are all quoted
+        url = unquote(url)  # chrome urls are all quoted
         dd = int(durs)
         dur: Second | None = None if dd == 0 else dd // 1_000_000
         return Visit(
@@ -196,12 +199,12 @@ class Chrome(Extr):
 # yep, tested it and looks like utc
 def chrome_time_to_utc(chrome_time: int) -> datetime:
     epoch = (chrome_time / 1_000_000) - 11644473600
-    return datetime.fromtimestamp(epoch, pytz.utc)
+    return datetime.fromtimestamp(epoch, timezone.utc)
 def _row2visit_firefox(row: sqlite3.Row, loc: Loc) -> Visit:
     url = row['url']
-    ts  = float(row['visit_date'])
+    ts = float(row['visit_date'])
     # ok, looks like it's unix epoch
     # https://stackoverflow.com/a/19430099/706389
@@ -214,17 +217,19 @@ def _row2visit_firefox(row: sqlite3.Row, loc: Loc) -> Visit:
     else:
         # milliseconds
         ts /= 1_000
-    dt = datetime.fromtimestamp(ts, pytz.utc)
-    url = unquote(url) # firefox urls are all quoted
+    dt = datetime.fromtimestamp(ts, timezone.utc)
+    url = unquote(url)  # firefox urls are all quoted
     return Visit(
         url=url,
         dt=dt,
         locator=loc,
     )
 # https://web.archive.org/web/20201026130310/http://fileformats.archiveteam.org/wiki/History.db
 class Safari(Extr):
-    detector='history_tombstones'
+    detector = 'history_tombstones'
+    # fmt: off
     schema_check=(
         'history_visits', [
             'history_visits', "id, history_item, visit_time",
@@ -245,13 +250,14 @@ class Safari(Extr):
         # ('V.visit_duration'                       , 'INTEGER NOT NULL'),
         # V.omnibox thing looks useless
     ], key=('url', 'visit_time', 'vid', 'urlid'))
-    query='FROM chunk.history_visits as V, chunk.history_items as U WHERE V.history_item = U.id'
+    # fmt: on
+    query = 'FROM chunk.history_visits as V, chunk.history_items as U WHERE V.history_item = U.id'
     @staticmethod
     def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
-        url  = row['url']
-        ts   = row['visit_time'] + 978307200 # https://stackoverflow.com/a/34546556/16645
-        dt = datetime.fromtimestamp(ts, pytz.utc)
+        url = row['url']
+        ts = row['visit_time'] + 978307200  # https://stackoverflow.com/a/34546556/16645
+        dt = datetime.fromtimestamp(ts, timezone.utc)
         return Visit(
             url=url,
@@ -259,10 +265,12 @@ class Safari(Extr):
             locator=loc,
         )
 # https://web.archive.org/web/20190730231715/https://www.forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format#moz_historyvisits
 class Firefox(Extr):
-    detector='moz_meta'
-    schema_check=('moz_historyvisits', "id, from_visit, place_id, visit_date, visit_type")
+    detector = 'moz_meta'
+    schema_check = ('moz_historyvisits', "id, from_visit, place_id, visit_date, visit_type")
+    # fmt: off
     schema=Schema(cols=[
         ('P.url'       , 'TEXT'),
@@ -278,14 +286,16 @@ class Firefox(Extr):
         # needs to be defensive
         # ('V.session'   , 'INTEGER'),
     ], key=('url', 'visit_date', 'vid', 'pid'))
-    query='FROM chunk.moz_historyvisits as V, chunk.moz_places as P WHERE V.place_id = P.id'
+    # fmt: on
+    query = 'FROM chunk.moz_historyvisits as V, chunk.moz_places as P WHERE V.place_id = P.id'
-    row2visit = _row2visit_firefox
+    row2visit = _row2visit_firefox  # type: ignore[assignment]
 class FirefoxPhone(Extr):
-    detector='remote_devices'
-    schema_check=('visits', "_id, history_guid, visit_type, date, is_local")
+    detector = 'remote_devices'
+    schema_check = ('visits', "_id, history_guid, visit_type, date, is_local")
+    # fmt: off
     schema=Schema(cols=[
         ('H.url'               , 'TEXT NOT NULL'   ),
@@ -297,6 +307,7 @@ class FirefoxPhone(Extr):
         ('V.date as visit_date', 'INTEGER NOT NULL'),
         # ('is_local'    , 'INTEGER NOT NULL'),
     ], key=('url', 'date', 'vid', 'hid'))
-    query='FROM chunk.visits as V, chunk.history as H  WHERE V.history_guid = H.guid'
+    # fmt: on
+    query = 'FROM chunk.visits as V, chunk.history as H  WHERE V.history_guid = H.guid'
-    row2visit = _row2visit_firefox
+    row2visit = _row2visit_firefox  # type: ignore[assignment]

promnesia/sources/demo.py CHANGED Viewed

@@ -21,7 +21,6 @@ def index(
     base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000),
     delta: timedelta | Seconds = timedelta(hours=1),
 ) -> Results:
     base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
     delta_ = delta if isinstance(delta, timedelta) else timedelta(seconds=delta)

promnesia/sources/fbmessenger.py CHANGED Viewed

@@ -33,4 +33,3 @@ def index() -> Results:
                 context=m.text,
                 locator=loc,
             )

promnesia/sources/filetypes.py CHANGED Viewed

@@ -1,23 +1,26 @@
 from __future__ import annotations
-from collections.abc import Iterable, Sequence
+from collections.abc import Callable, Iterable, Sequence
 from functools import lru_cache
 from pathlib import Path
-from typing import Callable, NamedTuple, Union
+from typing import NamedTuple
 from ..common import Results, Url
 # TODO doesn't really belong here...
 Ctx = Sequence[str]
 class EUrl(NamedTuple):
     url: Url
-    ctx: Ctx # TODO ctx here is more like a Loc
+    ctx: Ctx  # TODO ctx here is more like a Loc
 ###
 # keys are mime types + extensions
-Ex = Callable[[Path], Union[Results, Iterable[EUrl]]]
+Ex = Callable[[Path], Results | Iterable[EUrl]]
 # None means unhandled
 TYPE2IDX: dict[str, Ex | None] = {}
 # NOTE: there are some types in auto.py at the moment... it's a bit messy
@@ -27,13 +30,13 @@ TYPE2IDX: dict[str, Ex | None] = {}
 @lru_cache(None)
 def type2idx(t: str) -> Ex | None:
     if len(t) == 0:
-        return None # just in case?
+        return None  # just in case?
     # first try exact match
-    e = TYPE2IDX.get(t, None)
+    e = TYPE2IDX.get(t)
     if e is not None:
         return e
     t = t.strip('.')
-    e = TYPE2IDX.get(t, None)
+    e = TYPE2IDX.get(t)
     if e is not None:
         return e
     # otherwise, try prefixes?
@@ -42,6 +45,7 @@ def type2idx(t: str) -> Ex | None:
             return v
     return None
 # for now source code just indexed with grep, not sure if it's good enough?
 # if not, some fanceir library could be used...
 # e.g. https://github.com/karlicoss/promnesia/pull/152/commits/c2f00eb4ee4018b02c9bf3966a036db69a43373d
@@ -82,7 +86,7 @@ CODE = {
     '.ts', # most likely typescript.. otherwise determined as text/vnd.trolltech.linguist mime
     '.js',
-}
+}  # fmt: skip
 # TODO discover more extensions with mimetypes library?
@@ -100,6 +104,7 @@ video/
 handle_later = lambda *_args, **_kwargs: ()
 def ignore(*_args, **_kwargs):
     # TODO log (once?)
     yield from ()
@@ -129,7 +134,7 @@ TYPE2IDX.update({
     'application/zip'         : handle_later,
     'application/x-tar'       : handle_later,
     'application/gzip'        : handle_later,
-})
+})  # fmt: skip
 # TODO use some existing file for initial gitignore..
@@ -148,5 +153,4 @@ IGNORE = [
     # TODO not sure about these:
     '.gitignore',
     '.babelrc',
-]
+]  # fmt: skip

promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

promnesia 1.3.20241021py3-none-any.whl → 1.4.20250909py3-none-any.whl