PyPI - promnesia - Versions diffs - 1.2.20230515__py3-none-any.whl → 1.2.20240810__py3-none-any.whl - Mend

promnesia 1.2.20230515py3-none-any.whl → 1.2.20240810py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

promnesia/__main__.py +26 -14
promnesia/cannon.py +4 -4
promnesia/common.py +39 -28
promnesia/compare.py +3 -2
promnesia/config.py +4 -2
promnesia/database/common.py +66 -0
promnesia/database/dump.py +187 -0
promnesia/{read_db.py → database/load.py} +10 -11
promnesia/extract.py +1 -0
promnesia/kjson.py +1 -1
promnesia/logging.py +3 -3
promnesia/misc/__init__.pyi +0 -0
promnesia/misc/config_example.py +1 -2
promnesia/misc/install_server.py +2 -3
promnesia/server.py +18 -19
promnesia/sources/__init__.pyi +0 -0
promnesia/sources/auto.py +9 -7
promnesia/sources/browser_legacy.py +11 -5
promnesia/sources/demo.py +18 -2
promnesia/sources/filetypes.py +7 -0
promnesia/sources/github.py +2 -2
promnesia/sources/hypothesis.py +1 -1
promnesia/sources/markdown.py +15 -15
promnesia/sources/org.py +7 -3
promnesia/sources/plaintext.py +3 -1
promnesia/sources/reddit.py +2 -2
promnesia/sources/rss.py +1 -1
promnesia/sources/signal.py +22 -14
promnesia/sources/stackexchange.py +2 -2
promnesia/sources/takeout.py +58 -1
promnesia/sources/takeout_legacy.py +10 -2
promnesia/tests/__init__.py +0 -0
promnesia/tests/common.py +137 -0
promnesia/tests/server_helper.py +64 -0
promnesia/tests/sources/__init__.py +0 -0
promnesia/tests/sources/test_auto.py +66 -0
promnesia/tests/sources/test_filetypes.py +42 -0
promnesia/tests/sources/test_hypothesis.py +39 -0
promnesia/tests/sources/test_org.py +65 -0
promnesia/tests/sources/test_plaintext.py +26 -0
promnesia/tests/sources/test_shellcmd.py +22 -0
promnesia/tests/sources/test_takeout.py +58 -0
promnesia/tests/test_cannon.py +325 -0
promnesia/tests/test_cli.py +42 -0
promnesia/tests/test_compare.py +30 -0
promnesia/tests/test_config.py +290 -0
promnesia/tests/test_db_dump.py +223 -0
promnesia/tests/test_extract.py +61 -0
promnesia/tests/test_extract_urls.py +43 -0
promnesia/tests/test_indexer.py +245 -0
promnesia/tests/test_server.py +292 -0
promnesia/tests/test_traverse.py +41 -0
promnesia/tests/utils.py +35 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +13 -17
promnesia-1.2.20240810.dist-info/RECORD +83 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
{promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
promnesia/dump.py +0 -105
promnesia-1.2.20230515.dist-info/RECORD +0 -58
{promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
{promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0

promnesia/logging.py CHANGED Viewed

@@ -61,7 +61,7 @@ _init_done = 'lazylogger_init_done'
 def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
     lvl = mklevel(level)
     try:
-        import logzero  # type: ignore[import]
+        import logzero  # type: ignore[import-not-found]
         formatter = logzero.LogFormatter(
             fmt=FORMAT_COLOR,
             datefmt=DATEFMT,
@@ -75,7 +75,7 @@ def setup_logger(logger: logging.Logger, level: LevelIsh) -> None:
     logger.addFilter(AddExceptionTraceback())
     if use_logzero and not COLLAPSE_DEBUG_LOGS: # all set, nothing to do
         # 'simple' setup
-        logzero.setup_logger(logger.name, level=lvl, formatter=formatter)
+        logzero.setup_logger(logger.name, level=lvl, formatter=formatter)  # type: ignore[possibly-undefined]
         return
     h = CollapseDebugHandler() if COLLAPSE_DEBUG_LOGS else logging.StreamHandler()
@@ -101,7 +101,7 @@ class LazyLogger(logging.Logger):
         # oh god.. otherwise might go into an inf loop
         if not hasattr(logger, _init_done):
             setattr(logger, _init_done, False) # will setup on the first call
-            logger.isEnabledFor = isEnabledFor_lazyinit  # type: ignore[assignment]
+            logger.isEnabledFor = isEnabledFor_lazyinit  # type: ignore[method-assign]
         return cast(LazyLogger, logger)

promnesia/misc/__init__.pyi ADDED Viewed

File without changes

promnesia/misc/config_example.py CHANGED Viewed

@@ -11,7 +11,6 @@ SOURCES = [
     Source(
         auto.index,
         # just some arbitrary directory with plaintext files
-        '/usr/include/c++/',
-        '/usr/local/include/c++/', # on apple they are here apparently..
+        '/usr/share/vim/',
     )
 ]

promnesia/misc/install_server.py CHANGED Viewed

@@ -7,6 +7,7 @@ import sys
 import time
 from pathlib import Path
 import platform
+import shutil
 from subprocess import check_call, run
 from typing import List
@@ -118,9 +119,7 @@ def install(args: argparse.Namespace) -> None:
     if os.environ.get('DIRTY_RUN') is not None:
         launcher = str(root() / 'scripts/promnesia')
     else:
-        # must be installed, so available in PATH
-        import distutils.spawn
-        exe = distutils.spawn.find_executable('promnesia'); assert exe is not None
+        exe = shutil.which('promnesia'); assert exe is not None
         launcher = exe # older systemd wants absolute paths..
     db = args.db

promnesia/server.py CHANGED Viewed

@@ -1,12 +1,11 @@
 #!/usr/bin/python3
 from __future__ import annotations
-__package__ = 'promnesia'  # ugh. hacky way to make wsgi runner work properly...
 import argparse
 from dataclasses import dataclass
 from datetime import timedelta
 from functools import lru_cache
+import importlib.metadata
 import json
 import logging
 import os
@@ -19,7 +18,7 @@ from pytz import BaseTzInfo
 import fastapi
-from sqlalchemy import MetaData, exists, literal, between, or_, and_, exc, select
+from sqlalchemy import literal, between, or_, and_, exc, select
 from sqlalchemy import Column, Table, func, types
 from sqlalchemy.sql.elements import ColumnElement
 from sqlalchemy.sql import text
@@ -27,6 +26,7 @@ from sqlalchemy.sql import text
 from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz
 from .cannon import canonify
+from .database.load import DbStuff, get_db_stuff, row_to_db_visit
 Json = Dict[str, Any]
@@ -51,8 +51,7 @@ def get_logger() -> logging.Logger:
 def get_version() -> str:
-    from pkg_resources import get_distribution
-    return get_distribution(__package__).version
+    return importlib.metadata.version(__package__)
 class ServerConfig(NamedTuple):
@@ -119,8 +118,6 @@ def get_db_path(check: bool=True) -> Path:
     return db
-from .read_db import DbStuff, get_db_stuff
 @lru_cache(1)
 # PathWithMtime aids lru_cache in reloading the sqlalchemy binder
 def _get_stuff(db_path: PathWithMtime) -> DbStuff:
@@ -136,7 +133,7 @@ def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
 def db_stats(db_path: Path) -> Json:
-    engine, binder, table = get_stuff(db_path)
+    engine, table = get_stuff(db_path)
     query = select(func.count()).select_from(table)
     with engine.connect() as conn:
         total = list(conn.execute(query))[0][0]
@@ -151,8 +148,8 @@ class Where(Protocol):
 @dataclass
 class VisitsResponse:
-    original_url: Url
-    normalised_url: Url
+    original_url: str
+    normalised_url: str
     visits: Any
@@ -167,7 +164,7 @@ def search_common(url: str, where: Where) -> VisitsResponse:
         url = original_url
     logger.info('normalised url: %s', url)
-    engine, binder, table = get_stuff()
+    engine, table = get_stuff()
     query = table.select().where(where(table=table, url=url))
     logger.debug('query: %s', query)
@@ -175,7 +172,7 @@ def search_common(url: str, where: Where) -> VisitsResponse:
     with engine.connect() as conn:
         try:
             # TODO make more defensive here
-            visits: List[DbVisit] = [binder.from_row(row) for row in conn.execute(query)]
+            visits: List[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
         except exc.OperationalError as e:
             if getattr(e, 'msg', None) == 'no such table: visits':
                 logger.warn('you may have to run indexer first!')
@@ -232,6 +229,7 @@ def status() -> Json:
     try:
         version = get_version()
     except Exception as e:
+        logger.exception(e)
         version = None
     return {
@@ -241,10 +239,9 @@ def status() -> Json:
     }
-from dataclasses import dataclass
 @dataclass
 class VisitsRequest:
-    url: Url
+    url: str
 @app.get ('/visits', response_model=VisitsResponse)
 @app.post('/visits', response_model=VisitsResponse)
@@ -255,15 +252,17 @@ def visits(request: VisitsRequest) -> VisitsResponse:
         url=url,
         # odd, doesn't work just with: x or (y and z)
         where=lambda table, url: or_(
-            table.c.norm_url == url,  # exact match
-            and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True)) # + child visits, but only 'interesting' ones
+            # exact match
+            table.c.norm_url == url,
+            # + child visits, but only 'interesting' ones
+            and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True))  # noqa: E711
         ),
     )
 @dataclass
 class SearchRequest:
-    url: Url
+    url: str
 @app.get ('/search', response_model=VisitsResponse)
 @app.post('/search', response_model=VisitsResponse)
@@ -361,7 +360,7 @@ def visited(request: VisitedRequest) -> VisitedResponse:
     if len(snurls) == 0:
         return []
-    engine, binder, table = get_stuff()
+    engine, table = get_stuff()
     # sqlalchemy doesn't seem to support SELECT FROM (VALUES (...)) in its api
     # also doesn't support array binding...
@@ -389,7 +388,7 @@ SELECT queried, visits.*
     # brings down large queries to 50ms...
     with engine.connect() as conn:
         res = list(conn.execute(query))
-        present: Dict[str, Any] = {row[0]: binder.from_row(row[1:]) for row in res}
+        present: Dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
     results = []
     for nu in nurls:
         r = present.get(nu, None)

promnesia/sources/__init__.pyi ADDED Viewed

File without changes

promnesia/sources/auto.py CHANGED Viewed

@@ -22,17 +22,18 @@ import warnings
 import pytz
 from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
+from ..common import warn_once
 from ..config import use_cores
-from .filetypes import EUrl
+from .filetypes import EUrl, Ctx
 from .auto_obsidian import obsidian_replacer
 from .auto_logseq import logseq_replacer
 def _collect(thing, path: List[str], result: List[EUrl]) -> None:
     if isinstance(thing, str):
-        ctx: Ctx = tuple(path) # type: ignore
+        ctx: Ctx = tuple(path)
         result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
     elif isinstance(thing, list):
         path.append('[]')
@@ -167,7 +168,7 @@ for t in CODE:
 Replacer = Optional[Callable[[str, str], str]]
 def index(
-        *paths: Union[PathIsh],
+        *paths: PathIsh,
         ignored: Union[Sequence[str], str]=(),
         follow: bool=True,
         replacer: Replacer=None,
@@ -282,6 +283,8 @@ def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
 def _index_file(pp: Path, opts: Options) -> Results:
     logger = get_logger()
+    # TODO need to keep debug logs here...
+    # logger.info(f"indexing {pp}")
     # TODO use kompress?
     # TODO not even sure if it's used...
     suf = pp.suffix.lower()
@@ -307,10 +310,9 @@ def _index_file(pp: Path, opts: Options) -> Results:
     ip, pm = by_path(pp)
     if ip is None:
-        # TODO use warning (with mime/ext as key?)
-        # TODO only log once? # hmm..
+        # todo not really sure about using warnings vs yielding error here?
         msg = f'No extractor for suffix {suf}, mime {pm}'
-        warnings.warn(msg)
+        warn_once(msg)
         yield echain(ex, RuntimeError(msg))
         return
@@ -318,7 +320,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
     def indexer() -> Union[Urls, Results]:
         # eh, annoying.. need to make more generic..
-        idx = ip(pp) # type: ignore
+        idx = ip(pp)
         try:
             yield from idx
         except Exception as e:

promnesia/sources/browser_legacy.py CHANGED Viewed

@@ -2,15 +2,21 @@ from datetime import datetime
 from pathlib import Path
 from urllib.parse import unquote
 import sqlite3
-from typing import List, Set
+from typing import List, Set, Optional
 import pytz
 from ..common import PathIsh, Results, Visit, Loc, logger, Second, is_sqlite_db
 from .. import config
-# todo mcachew?
-from cachew import cachew
+try:
+    from cachew import cachew
+except ModuleNotFoundError as me:
+    if me.name != 'cachew':
+        raise me
+    # this module is legacy anyway, so just make it defensive
+    def cachew(*args, **kwargs):  # type: ignore[no-redef]
+        return lambda f: f
 def index(p: PathIsh) -> Results:
@@ -43,7 +49,7 @@ def _index_dbs(dbs: List[Path], cachew_name: str):
 # todo wow, stack traces are ridiculous here...
 # todo hmm, feels like it should be a class or something?
 @cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
-def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
+def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) -> Results:
     if len(dbs) == 0:
         return
@@ -58,7 +64,7 @@ def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
             xs_was_cached = True
             logger.debug('seems that %d first items were previously cached', len(xs))
         if xs_was_cached:
-            key = (r.url, r.dt)
+            key = str(r) if isinstance(r, Exception) else (r.url, r.dt)
             assert key not in emitted, key # todo not sure if this assert is necessary?
             # hmm ok it might happen if we messed up with indexing individual db?
             # alternatively, could abuse it to avoid messing with 'emitted' in _index_db?

promnesia/sources/demo.py CHANGED Viewed

@@ -4,17 +4,33 @@ Generates a sequence of fake evenly separated visits
 '''
 from datetime import datetime, timedelta
+from typing import Union
 from ..common import Results, Visit, Loc
-def index(count: int=100, *, base_dt: datetime=datetime.min + timedelta(days=5000), delta: timedelta=timedelta(hours=1)) -> Results:
+IsoFormatDt = str
+Seconds = int
+# TODO allow passing isoformat string as base_dt?
+# and maybe something similar as delta? start with seconds maybe
+def index(
+        count: int=100,
+        *,
+        base_dt: Union[datetime, IsoFormatDt] = datetime.min + timedelta(days=5000),
+        delta: Union[timedelta, Seconds] = timedelta(hours=1),
+) -> Results:
+    base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
+    delta_ = delta if isinstance(delta, timedelta) else timedelta(seconds=delta)
     # todo with some errors too?
     # todo use data generation library suggested for HPI?
     for i in range(count):
         yield Visit(
             url=f'https://demo.com/page{i}.html',
-            dt=base_dt + delta * i,
+            dt=base_dt_ + delta_ * i,
             locator=Loc.make('demo'),
         )
         # todo add context?

promnesia/sources/filetypes.py CHANGED Viewed

@@ -67,6 +67,7 @@ CODE = {
     'text/vnd.graphviz',
     'text/x-diff',  # patch files
     'text/x-php',
+    'text/x-lilypond',
     # these didn't have a mime type, or were mistyped?
     'css',
@@ -115,6 +116,12 @@ TYPE2IDX.update({
     '.vcf' : ignore,
     'message/rfc822': ignore, # ??
+    # todo ignore all fonts?
+    'font/woff2': ignore,
+    'font/woff': ignore,
+    'text/x-Algol68': ignore,  # ugh some license file had this?? maybe always index text/ as text?
+    'text/x-bytecode.python': ignore,  # todo ignore all x-bytecode?
     # TODO not sure what to do about these..
     'application/octet-stream': handle_later,
     'application/zip'         : handle_later,

promnesia/sources/github.py CHANGED Viewed

@@ -31,7 +31,7 @@ def index(*, render_markdown: bool = False) -> Results:
         # if enabled, convert the (markdown) body to HTML
         context: Optional[str] = e.body
         if e.body is not None and render_markdown:
-            context = TextParser(e.body)._doc_ashtml()
+            context = TextParser(e.body)._doc_ashtml()  # type: ignore[possibly-undefined]
         # locator should link back to this event
         loc = Loc.make(title=e.summary, href=e.link)
@@ -74,7 +74,7 @@ def index(*, render_markdown: bool = False) -> Results:
         # extract from markdown links like [link text](https://...)
         # incase URLExtract missed any somehow
         if render_markdown:
-            for res in extract_from_text(e.body):
+            for res in extract_from_text(e.body):  # type: ignore[possibly-undefined]
                 if isinstance(res, Exception):
                     yield res
                     continue

promnesia/sources/hypothesis.py CHANGED Viewed

@@ -8,7 +8,7 @@ def index() -> Results:
     from . import hpi
     import my.hypothesis as hyp
-    for h in hyp.get_highlights():
+    for h in hyp.highlights():
         if isinstance(h, Exception):
             yield h
             continue

promnesia/sources/markdown.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from pathlib import Path
 from typing import Iterator, NamedTuple, Optional
-from ..common import get_logger, Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
+from ..common import Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
-import mistletoe # type: ignore
-from mistletoe.span_token import AutoLink, Link # type: ignore
-import mistletoe.block_token as BT # type: ignore
-from mistletoe.html_renderer import HTMLRenderer # type: ignore
+import mistletoe  # type: ignore
+from mistletoe.span_token import AutoLink, Link  # type: ignore
+import mistletoe.block_token as BT  # type: ignore
+from mistletoe.html_renderer import HTMLRenderer  # type: ignore
 renderer = HTMLRenderer()
@@ -42,7 +42,7 @@ HTML_MARKER = '!html '
 def _ashtml(block) -> str:
     res = renderer.render(block)
     if res.startswith('<p>') and res.endswith('</p>'):
-        res = res[3: -4] # meh, but for now fine
+        res = res[3:-4]  # meh, but for now fine
     return res
@@ -62,7 +62,6 @@ class Parser:
         context = None if last_block is None else HTML_MARKER + _ashtml(last_block)
         yield Parsed(url=url, context=context)
     def _walk(self, cur, last_block) -> Iterator[Result]:
         if isinstance(cur, block_tokens):
             last_block = cur
@@ -73,12 +72,14 @@ class Parser:
             logger.exception(e)
             yield e
-        children = getattr(cur, 'children', [])
+        # keeping getattr for compatibility in older versions of mistletoe, it was optional
+        children = getattr(cur, 'children', None)
+        if children is None:
+            return
         for c in children:
             yield from self._walk(c, last_block=last_block)
-    def walk(self):
+    def walk(self) -> Iterator[Result]:
         yield from self._walk(self.doc, last_block=None)
@@ -94,7 +95,7 @@ def extract_from_file(fname: PathIsh) -> Iterator[Extraction]:
             yield Visit(
                 url=r.url,
                 dt=fallback_dt,
-                locator=Loc.file(fname), # TODO line number
+                locator=Loc.file(fname),  # TODO line number
                 context=r.context,
             )
@@ -105,9 +106,9 @@ class TextParser(Parser):
     Instead of chunking blocks like for files, this returns the entire
     message rendered as the context
     '''
-    def __init__(self, text: str):
-        self.doc = mistletoe.Document(text)
+    def __init__(self, text: str) -> None:
+        self.doc = mistletoe.Document(text)
     def _doc_ashtml(self):
         '''
@@ -117,8 +118,7 @@ class TextParser(Parser):
             self._html = HTML_MARKER + _ashtml(self.doc)
         return self._html
-    def _extract(self, cur, last_block = None) -> Iterator[Parsed]:
+    def _extract(self, cur, last_block=None) -> Iterator[Parsed]:
         if not isinstance(cur, (AutoLink, Link)):
             return

promnesia/sources/org.py CHANGED Viewed

@@ -57,8 +57,12 @@ def _parse_node(n: OrgNode) -> Parsed:
             # todo a bit hacky..
             heading = heading.replace(createds + ' ', '')
     if createds is not None:
-        [odt] = OrgDate.list_from_str(createds)
-        dt = odt.start
+        if '<%%' in createds:
+            # sexp date, not supported
+            dt = None
+        else:
+            [odt] = OrgDate.list_from_str(createds)
+            dt = odt.start
     else:
         dt = None
     return Parsed(dt=dt, heading=heading)
@@ -80,7 +84,7 @@ def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgN
             parsed = parsed._replace(dt=dt)
         else:
             dt = parsed.dt
-    yield parsed, node
+        yield parsed, node
     for c in node.children:
         yield from walk_node(node=c, dt=dt)

promnesia/sources/plaintext.py CHANGED Viewed

@@ -98,8 +98,10 @@ def extract_from_path(path: PathIsh) -> Command:
                 '.gz',
                 '.zip',
         )):
-            logger.info(f"Extracting from compressed file {path}")
+            # todo should be debug?
+            # or should delete it completely, feels like unpacking archives here is a bit too much
             raise RuntimeError(f"Archives aren't supported yet: {path}")
+            logger.info(f"Extracting from compressed file {path}")
             import lzma
             from tempfile import NamedTemporaryFile
             # TODO hopefully, no collisions

promnesia/sources/reddit.py CHANGED Viewed

@@ -16,7 +16,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
         if "No module named 'my.reddit.all'" in str(e):
             import warnings
             warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
-            from my.reddit import submissions, comments, saved, upvoted  # type: ignore[no-redef]
+            from my.reddit import submissions, comments, saved, upvoted
         else:
             raise e
@@ -95,7 +95,7 @@ class RedditRenderer:
     def _from_upvote(self, i: 'Upvote') -> Results:
         locator = Loc.make(
-            title=f'Reddit upvote',
+            title='Reddit upvote',
             href=i.url,
         )
         yield from self._from_common(i, locator=locator)

promnesia/sources/rss.py CHANGED Viewed

@@ -23,6 +23,6 @@ def index() -> Results:
         yield Visit(
             url=feed.url,
             dt=feed.created_at or default_datetime,
-            context=f'RSS subscription', # TODO use 'provider', etc?
+            context='RSS subscription', # TODO use 'provider', etc?
             locator=locator,
         )

promnesia/sources/signal.py CHANGED Viewed

@@ -63,6 +63,8 @@ def index(
     logger.debug("Paths to harvest: %s", db_paths)
     if not http_only:
         sql_query = f"{messages_query}\nWHERE body LIKE '%http%'"
+    else:
+        sql_query = messages_query
     for db_path in resolved_db_paths:
         logger.info("Ciphered db to harvest %s", db_path)
@@ -106,12 +108,18 @@ messages_query = dedent(
         SELECT
             id,
             type,
-            coalesce(name, profileName, profileFamilyName, e164) as aname,
+            coalesce(
+                profileFullName,
+                profileName,
+                name,
+                profileFamilyName,
+                e164
+            ) as aname,
             name,
             profileName,
             profileFamilyName,
             e164,
-            uuid
+            serviceId
         FROM conversations
     ),
     Msgs AS (
@@ -123,8 +131,8 @@ messages_query = dedent(
                 M.received_at,
                 M.sent_at
             ) AS timestamp,
-            IIF(M.type = "outgoing",
-                "Me (" || C2.aname || ")",
+            IIF(M.type = 'outgoing',
+                'Me (' || C2.aname || ')',
                 C2.aname
             ) AS sender,
             M.conversationId AS cid,
@@ -138,7 +146,7 @@ messages_query = dedent(
         INNER JOIN Cons AS C1
             ON M.conversationId = C1.id
         INNER JOIN Cons AS C2
-            ON M.sourceUuid = C2.uuid
+            ON M.sourceServiceId = C2.serviceId
     )
     SELECT id, timestamp, sender, cid, chatname, body
     FROM Msgs
@@ -188,8 +196,8 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
 def _expand_paths(paths: PathIshes) -> Iterable[Path]:
     if _is_pathish(paths):
-        paths = [paths]  # type: ignore[assignment,list-item]
-    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr,list-item]
+        paths = [paths]  # type: ignore[list-item]
+    return [pp.resolve() for p in paths for pp in _expand_path(p)]  # type: ignore[union-attr]
 def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]:
@@ -236,7 +244,7 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
             )
         if db_paths and append:
-            db_paths = [  # type: ignore[misc,assignment]
+            db_paths = [  # type: ignore[assignment]
                 *([db_paths] if _is_pathish(db_paths) else db_paths),
                 plat_paths,
             ]
@@ -310,8 +318,8 @@ def connect_db(
             sql_cmds.extend(
                 [
                     f"ATTACH DATABASE '{decrypted_file}' AS plaintext KEY '';",
-                    f"SELECT sqlcipher_export('plaintext');",
-                    f"DETACH DATABASE plaintext;",
+                    "SELECT sqlcipher_export('plaintext');",
+                    "DETACH DATABASE plaintext;",
                 ]
             )
             sql = "\n".join(sql_cmds)
@@ -320,7 +328,7 @@ def connect_db(
                 "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
             )
             try:
-                sbp.run(  # type: ignore[call-overload]
+                sbp.run(
                     cmd,
                     check=True,
                     input=sql,
@@ -335,7 +343,7 @@ def connect_db(
                 ) from None
             db = sqlite3.connect(f"file:{decrypted_file}?mode=ro", uri=True)
         else:
-            from sqlcipher3 import dbapi2  # type: ignore[import]
+            from sqlcipher3 import dbapi2  # type: ignore[import-not-found]
             db = dbapi2.connect(f"file:{db_path}?mode=ro", uri=True)
             # Param-binding doesn't work for pragmas, so use a direct string concat.
@@ -419,9 +427,9 @@ def _harvest_db(
     with connect_db(db_path, key, decrypt_db=decrypt_db, **decryption_pragmas) as db:
         for mid, tstamp, sender, cid, chatname, text in db.execute(messages_query):
+            tstamp = from_epoch(tstamp / 1000.0)
+            row = (mid, tstamp, sender, cid, chatname, text)
             try:
-                tstamp = from_epoch(tstamp / 1000.0)
-                row = (mid, tstamp, sender, cid, chatname, text)
                 yield from _handle_row(row, db_path, locator_schema)
             except Exception as ex:
                 # TODO: also insert errors in db

promnesia/sources/stackexchange.py CHANGED Viewed

@@ -2,12 +2,12 @@
 Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
 '''
-from ..common import Results, Visit, Loc, extract_urls
+from ..common import Results, Visit, Loc
 def index() -> Results:
     from . import hpi
-    import my.stackexchange.gdpr as G # type: ignore[import] # TODO eh, not sure if should run against pypi or not...
+    import my.stackexchange.gdpr as G
     for v in G.votes():
         if isinstance(v, Exception):
             yield v

promnesia 1.2.20230515__py3-none-any.whl → 1.2.20240810__py3-none-any.whl

promnesia 1.2.20230515py3-none-any.whl → 1.2.20240810py3-none-any.whl