promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +38 -25
- promnesia/cannon.py +23 -23
- promnesia/common.py +49 -42
- promnesia/compare.py +18 -20
- promnesia/compat.py +10 -10
- promnesia/config.py +20 -22
- promnesia/database/common.py +4 -3
- promnesia/database/dump.py +14 -13
- promnesia/database/load.py +7 -7
- promnesia/extract.py +13 -11
- promnesia/kjson.py +11 -10
- promnesia/logging.py +1 -1
- promnesia/misc/install_server.py +7 -8
- promnesia/server.py +42 -31
- promnesia/sources/auto.py +43 -30
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +17 -13
- promnesia/sources/demo.py +7 -7
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +9 -7
- promnesia/sources/github.py +5 -7
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +3 -2
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +17 -7
- promnesia/sources/org.py +20 -10
- promnesia/sources/plaintext.py +30 -31
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +19 -18
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +3 -4
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +14 -13
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +3 -2
- promnesia/sources/takeout.py +23 -13
- promnesia/sources/takeout_legacy.py +15 -11
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/common.py +8 -5
- promnesia/tests/server_helper.py +11 -8
- promnesia/tests/sources/test_auto.py +2 -3
- promnesia/tests/sources/test_filetypes.py +2 -1
- promnesia/tests/sources/test_hypothesis.py +3 -3
- promnesia/tests/sources/test_org.py +2 -3
- promnesia/tests/sources/test_plaintext.py +0 -1
- promnesia/tests/sources/test_shellcmd.py +3 -4
- promnesia/tests/sources/test_takeout.py +3 -5
- promnesia/tests/test_cannon.py +5 -5
- promnesia/tests/test_cli.py +4 -6
- promnesia/tests/test_compare.py +1 -1
- promnesia/tests/test_config.py +7 -8
- promnesia/tests/test_db_dump.py +11 -12
- promnesia/tests/test_extract.py +10 -6
- promnesia/tests/test_indexer.py +14 -8
- promnesia/tests/test_server.py +2 -3
- promnesia/tests/test_traverse.py +0 -2
- promnesia/tests/utils.py +4 -4
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- promnesia-1.2.20240810.dist-info/RECORD +0 -83
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/sources/org.py
CHANGED
@@ -1,16 +1,26 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import re
|
3
|
-
from
|
4
|
+
from collections.abc import Iterable, Iterator
|
5
|
+
from datetime import datetime
|
4
6
|
from pathlib import Path
|
5
|
-
|
6
|
-
|
7
|
-
from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime
|
8
|
-
|
7
|
+
from typing import NamedTuple, Optional, cast
|
9
8
|
|
10
9
|
import orgparse
|
11
|
-
from orgparse.date import
|
10
|
+
from orgparse.date import OrgDate, gene_timestamp_regex
|
12
11
|
from orgparse.node import OrgNode
|
13
12
|
|
13
|
+
from promnesia.common import (
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Results,
|
18
|
+
Url,
|
19
|
+
Visit,
|
20
|
+
file_mtime,
|
21
|
+
get_logger,
|
22
|
+
iter_urls,
|
23
|
+
)
|
14
24
|
|
15
25
|
UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
|
16
26
|
|
@@ -36,7 +46,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
|
|
36
46
|
"""
|
37
47
|
|
38
48
|
class Parsed(NamedTuple):
|
39
|
-
dt:
|
49
|
+
dt: datetime | None
|
40
50
|
heading: str
|
41
51
|
|
42
52
|
|
@@ -74,7 +84,7 @@ def _get_heading(n: OrgNode):
|
|
74
84
|
return '' if n.is_root() else n.get_heading(format='raw')
|
75
85
|
|
76
86
|
|
77
|
-
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[
|
87
|
+
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]:
|
78
88
|
try:
|
79
89
|
parsed = _parse_node(node)
|
80
90
|
except Exception as e:
|
@@ -98,7 +108,7 @@ def get_body_compat(node: OrgNode) -> str:
|
|
98
108
|
# get_body was only added to root in 0.2.0
|
99
109
|
for x in warn_old_orgparse_once():
|
100
110
|
# ugh. really crap, but it will at least only warn once... (becaue it caches)
|
101
|
-
raise x
|
111
|
+
raise x # noqa: B904
|
102
112
|
return UPDATE_ORGPARSE_WARNING
|
103
113
|
else:
|
104
114
|
raise e
|
promnesia/sources/plaintext.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
from
|
2
|
-
from ..compat import removeprefix
|
1
|
+
from __future__ import annotations
|
3
2
|
|
4
3
|
from functools import lru_cache
|
5
4
|
from pathlib import Path
|
6
|
-
|
7
|
-
from
|
5
|
+
|
6
|
+
from promnesia.common import PathIsh, _is_windows, get_logger, get_tmpdir
|
8
7
|
|
9
8
|
# https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
|
10
9
|
_URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
|
@@ -12,16 +11,16 @@ _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@
|
|
12
11
|
if _is_windows:
|
13
12
|
# wtf? for some reason on windows (in cmd.exe specificaly) \b isn't working...
|
14
13
|
# this will make the regex a bit less precise, but not end of the world
|
15
|
-
_URL_REGEX = removeprefix(
|
14
|
+
_URL_REGEX = _URL_REGEX.removeprefix(r'\b')
|
16
15
|
|
17
16
|
|
18
|
-
@lru_cache
|
17
|
+
@lru_cache
|
19
18
|
def _has_grep() -> bool:
|
20
19
|
import shutil
|
21
20
|
return shutil.which('grep') is not None
|
22
21
|
|
23
22
|
|
24
|
-
Command =
|
23
|
+
Command = list[str]
|
25
24
|
|
26
25
|
|
27
26
|
_GREP_ARGS: Command = [
|
@@ -39,7 +38,7 @@ if not _is_windows:
|
|
39
38
|
|
40
39
|
# NOTE: grep/findstr exit with code 1 on no matches...
|
41
40
|
# we hack around it in shellcmd module (search 'grep')
|
42
|
-
def _grep(*, paths:
|
41
|
+
def _grep(*, paths: list[str], recursive: bool) -> Command:
|
43
42
|
return [
|
44
43
|
'grep',
|
45
44
|
*(['-r'] if recursive else []),
|
@@ -91,26 +90,26 @@ def extract_from_path(path: PathIsh) -> Command:
|
|
91
90
|
logger = get_logger()
|
92
91
|
if pp.is_dir(): # TODO handle archives here???
|
93
92
|
return _extract_from_dir(str(pp))
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
93
|
+
|
94
|
+
if any(pp.suffix == ex for ex in (
|
95
|
+
'.xz',
|
96
|
+
'.bz2',
|
97
|
+
'.gz',
|
98
|
+
'.zip',
|
99
|
+
)):
|
100
|
+
# todo should be debug?
|
101
|
+
# or should delete it completely, feels like unpacking archives here is a bit too much
|
102
|
+
raise RuntimeError(f"Archives aren't supported yet: {path}")
|
103
|
+
# logger.info(f"Extracting from compressed file {path}")
|
104
|
+
# import lzma
|
105
|
+
# from tempfile import NamedTemporaryFile
|
106
|
+
# # TODO hopefully, no collisions
|
107
|
+
# import os.path
|
108
|
+
# fname = os.path.join(tdir.name, os.path.basename(path))
|
109
|
+
# with open(fname, 'wb') as fo:
|
110
|
+
# with lzma.open(path, 'r') as cf:
|
111
|
+
# fo.write(cf.read())
|
112
|
+
# return _extract_from_file(fname)
|
113
|
+
|
114
|
+
r = _extract_from_file(str(pp))
|
115
|
+
return r
|
promnesia/sources/pocket.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks
|
3
3
|
'''
|
4
|
-
|
4
|
+
|
5
|
+
from promnesia.common import Loc, Results, Visit
|
5
6
|
|
6
7
|
|
7
8
|
def index() -> Results:
|
8
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
9
10
|
from my.pocket import articles
|
10
11
|
|
11
12
|
# TODO use docstring from my. module? E.g. describing which pocket format is expected
|
promnesia/sources/reddit.py
CHANGED
@@ -2,21 +2,27 @@
|
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module
|
3
3
|
'''
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
|
+
import typing
|
5
8
|
from itertools import chain
|
6
|
-
from typing import Set, Optional, Type
|
7
9
|
|
8
|
-
from
|
10
|
+
from promnesia.common import Loc, Results, Visit, extract_urls, logger
|
11
|
+
|
12
|
+
if typing.TYPE_CHECKING:
|
13
|
+
from my.reddit.common import Comment, RedditBase, Save, Submission, Upvote
|
14
|
+
|
9
15
|
|
16
|
+
def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results:
|
17
|
+
from . import hpi # noqa: F401
|
10
18
|
|
11
|
-
def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results:
|
12
|
-
from . import hpi
|
13
19
|
try:
|
14
|
-
from my.reddit.all import
|
20
|
+
from my.reddit.all import comments, saved, submissions, upvoted
|
15
21
|
except ModuleNotFoundError as e:
|
16
22
|
if "No module named 'my.reddit.all'" in str(e):
|
17
23
|
import warnings
|
18
24
|
warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
|
19
|
-
from my.reddit import
|
25
|
+
from my.reddit import comments, saved, submissions, upvoted
|
20
26
|
else:
|
21
27
|
raise e
|
22
28
|
|
@@ -58,7 +64,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
|
|
58
64
|
# mostly here so we can keep track of how the user
|
59
65
|
# wants to render markdown
|
60
66
|
class RedditRenderer:
|
61
|
-
def __init__(self, render_markdown: bool = False) -> None:
|
67
|
+
def __init__(self, *, render_markdown: bool = False) -> None:
|
62
68
|
self._link_extractor = None
|
63
69
|
self._parser_cls = None
|
64
70
|
try:
|
@@ -77,7 +83,7 @@ class RedditRenderer:
|
|
77
83
|
self.render_markdown = render_markdown
|
78
84
|
|
79
85
|
|
80
|
-
def _from_comment(self, i:
|
86
|
+
def _from_comment(self, i: Comment) -> Results:
|
81
87
|
locator = Loc.make(
|
82
88
|
title='Reddit comment',
|
83
89
|
href=i.url,
|
@@ -85,7 +91,7 @@ class RedditRenderer:
|
|
85
91
|
yield from self._from_common(i, locator=locator)
|
86
92
|
|
87
93
|
|
88
|
-
def _from_submission(self, i:
|
94
|
+
def _from_submission(self, i: Submission) -> Results:
|
89
95
|
locator = Loc.make(
|
90
96
|
title=f'Reddit submission: {i.title}',
|
91
97
|
href=i.url,
|
@@ -93,7 +99,7 @@ class RedditRenderer:
|
|
93
99
|
yield from self._from_common(i, locator=locator)
|
94
100
|
|
95
101
|
|
96
|
-
def _from_upvote(self, i:
|
102
|
+
def _from_upvote(self, i: Upvote) -> Results:
|
97
103
|
locator = Loc.make(
|
98
104
|
title='Reddit upvote',
|
99
105
|
href=i.url,
|
@@ -101,7 +107,7 @@ class RedditRenderer:
|
|
101
107
|
yield from self._from_common(i, locator=locator)
|
102
108
|
|
103
109
|
|
104
|
-
def _from_save(self, i:
|
110
|
+
def _from_save(self, i: Save) -> Results:
|
105
111
|
locator = Loc.make(
|
106
112
|
title='Reddit save',
|
107
113
|
href=i.url,
|
@@ -117,7 +123,7 @@ class RedditRenderer:
|
|
117
123
|
return text
|
118
124
|
|
119
125
|
|
120
|
-
def _from_common(self, i:
|
126
|
+
def _from_common(self, i: RedditBase, locator: Loc) -> Results:
|
121
127
|
urls = [i.url]
|
122
128
|
# TODO this should belong to HPI.. fix permalink handling I guess
|
123
129
|
# ok, it's not present for all of them..
|
@@ -130,7 +136,7 @@ class RedditRenderer:
|
|
130
136
|
|
131
137
|
context = self._render_body(i.text)
|
132
138
|
|
133
|
-
emitted:
|
139
|
+
emitted: set[str] = set()
|
134
140
|
|
135
141
|
for url in chain(urls, extract_urls(i.text)):
|
136
142
|
if url in emitted:
|
@@ -165,8 +171,3 @@ class RedditRenderer:
|
|
165
171
|
)
|
166
172
|
emitted.add(res.url)
|
167
173
|
|
168
|
-
|
169
|
-
import typing
|
170
|
-
if typing.TYPE_CHECKING:
|
171
|
-
from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase
|
172
|
-
|
@@ -2,7 +2,7 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Roam Research data
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
@@ -43,6 +43,7 @@ def _collect(node: 'RoamNode') -> Results:
|
|
43
43
|
|
44
44
|
|
45
45
|
import typing
|
46
|
+
|
46
47
|
if typing.TYPE_CHECKING:
|
47
48
|
import my.roamresearch as RR
|
48
49
|
RoamNode = RR.Node
|
promnesia/sources/rss.py
CHANGED
@@ -2,14 +2,12 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from itertools import chain
|
6
|
-
|
7
|
-
from ..common import Visit, Loc, extract_urls, Results, get_logger
|
8
|
-
|
9
5
|
from datetime import datetime
|
10
6
|
|
11
7
|
import pytz
|
12
8
|
|
9
|
+
from promnesia.common import Loc, Results, Visit
|
10
|
+
|
13
11
|
# arbitrary, 2011-11-04 00:05:23.283+00:00
|
14
12
|
default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
|
15
13
|
# TODO FIXME allow for visit not to have datetime?
|
@@ -17,6 +15,7 @@ default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
|
|
17
15
|
|
18
16
|
def index() -> Results:
|
19
17
|
from my.rss.all import subscriptions
|
18
|
+
|
20
19
|
for feed in subscriptions():
|
21
20
|
# TODO locator should be optional too? although could use direct link in the rss reader interface
|
22
21
|
locator = Loc.make(title='my.rss')
|
promnesia/sources/shellcmd.py
CHANGED
@@ -2,18 +2,31 @@
|
|
2
2
|
Greps out URLs from an arbitrary shell command results.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
|
6
7
|
import os
|
7
8
|
import re
|
8
|
-
from subprocess import run, PIPE
|
9
|
-
from typing import Union, Sequence
|
10
9
|
import warnings
|
10
|
+
from collections.abc import Sequence
|
11
|
+
from datetime import datetime
|
12
|
+
from subprocess import PIPE, run
|
13
|
+
|
14
|
+
from promnesia.common import (
|
15
|
+
Loc,
|
16
|
+
PathIsh,
|
17
|
+
Results,
|
18
|
+
Visit,
|
19
|
+
_is_windows,
|
20
|
+
extract_urls,
|
21
|
+
file_mtime,
|
22
|
+
get_system_tz,
|
23
|
+
now_tz,
|
24
|
+
)
|
11
25
|
|
12
|
-
from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh
|
13
26
|
from .plaintext import _has_grep
|
14
27
|
|
15
28
|
|
16
|
-
def index(command:
|
29
|
+
def index(command: str | Sequence[PathIsh]) -> Results:
|
17
30
|
cmd: Sequence[PathIsh]
|
18
31
|
cmds: str
|
19
32
|
if isinstance(command, str):
|
@@ -71,7 +84,7 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
|
|
71
84
|
context=context,
|
72
85
|
)
|
73
86
|
|
74
|
-
r = run(cmd, stdout=PIPE)
|
87
|
+
r = run(cmd, stdout=PIPE, check=False)
|
75
88
|
if r.returncode > 0:
|
76
89
|
if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
|
77
90
|
r.check_returncode()
|
promnesia/sources/signal.py
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
"""
|
2
2
|
Collects visits from Signal Desktop's encrypted SQLIite db(s).
|
3
3
|
"""
|
4
|
+
from __future__ import annotations
|
4
5
|
|
5
6
|
# Functions get their defaults from module-data.
|
6
7
|
#
|
7
8
|
# * Open-ciphered-db adapted from:
|
8
9
|
# https://github.com/carderne/signal-export/commit/2284c8f4
|
9
10
|
# * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
|
10
|
-
|
11
|
-
|
12
11
|
import json
|
13
12
|
import logging
|
14
13
|
import platform
|
15
14
|
import sqlite3
|
16
15
|
import subprocess as sbp
|
16
|
+
from collections.abc import Iterable, Iterator, Mapping
|
17
17
|
from contextlib import contextmanager
|
18
18
|
from pathlib import Path
|
19
19
|
from textwrap import dedent, indent
|
20
|
-
from typing import Any,
|
20
|
+
from typing import Any, Union
|
21
21
|
|
22
22
|
from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
|
23
23
|
|
@@ -29,7 +29,7 @@ def index(
|
|
29
29
|
http_only: bool = False,
|
30
30
|
locator_schema: str="editor",
|
31
31
|
append_platform_path: bool = False,
|
32
|
-
override_key:
|
32
|
+
override_key: str | None = None,
|
33
33
|
) -> Results:
|
34
34
|
"""
|
35
35
|
:param db_paths:
|
@@ -109,10 +109,10 @@ messages_query = dedent(
|
|
109
109
|
id,
|
110
110
|
type,
|
111
111
|
coalesce(
|
112
|
-
profileFullName,
|
113
|
-
profileName,
|
112
|
+
profileFullName,
|
113
|
+
profileName,
|
114
114
|
name,
|
115
|
-
profileFamilyName,
|
115
|
+
profileFamilyName,
|
116
116
|
e164
|
117
117
|
) as aname,
|
118
118
|
name,
|
@@ -237,11 +237,11 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
|
|
237
237
|
platform_name = platform.system()
|
238
238
|
try:
|
239
239
|
plat_paths = platform_db_paths[platform_name]
|
240
|
-
except LookupError:
|
240
|
+
except LookupError as le:
|
241
241
|
raise ValueError(
|
242
242
|
f"Unknown platform({platform_name}!"
|
243
243
|
f"\n Expected one of {list(platform_db_paths.keys())}."
|
244
|
-
)
|
244
|
+
) from le
|
245
245
|
|
246
246
|
if db_paths and append:
|
247
247
|
db_paths = [ # type: ignore[assignment]
|
@@ -261,7 +261,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
|
|
261
261
|
|
262
262
|
|
263
263
|
def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
|
264
|
-
with
|
264
|
+
with Path(signal_desktop_config_path).open() as conf:
|
265
265
|
return json.load(conf)["key"]
|
266
266
|
|
267
267
|
|
@@ -269,6 +269,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
|
|
269
269
|
def connect_db(
|
270
270
|
db_path: Path,
|
271
271
|
key,
|
272
|
+
*,
|
272
273
|
decrypt_db: bool = False,
|
273
274
|
sqlcipher_exe: PathIsh = "sqlcipher",
|
274
275
|
**decryption_pragmas: Mapping[str, Any],
|
@@ -333,7 +334,7 @@ def connect_db(
|
|
333
334
|
check=True,
|
334
335
|
input=sql,
|
335
336
|
capture_output=True,
|
336
|
-
|
337
|
+
text=True,
|
337
338
|
)
|
338
339
|
except sbp.CalledProcessError as ex:
|
339
340
|
prefix = " " * 4
|
@@ -380,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
|
|
380
381
|
if not urls:
|
381
382
|
return
|
382
383
|
|
383
|
-
assert (
|
384
|
+
assert ( # noqa: PT018
|
384
385
|
text and mid and sender and chatname
|
385
386
|
), f"should have eliminated messages without 'http' or missing ids: {row}"
|
386
387
|
|
@@ -400,7 +401,7 @@ def _harvest_db(
|
|
400
401
|
db_path: Path,
|
401
402
|
messages_query: str,
|
402
403
|
*,
|
403
|
-
override_key:
|
404
|
+
override_key: str | None = None,
|
404
405
|
locator_schema: str = "editor",
|
405
406
|
decrypt_db: bool = False,
|
406
407
|
**decryption_pragmas,
|
promnesia/sources/smscalls.py
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
|
3
3
|
'''
|
4
4
|
|
5
|
-
from promnesia.common import
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
10
10
|
from my.smscalls import messages
|
11
11
|
|
12
12
|
for m in messages():
|
@@ -2,12 +2,13 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
10
10
|
import my.stackexchange.gdpr as G
|
11
|
+
|
11
12
|
for v in G.votes():
|
12
13
|
if isinstance(v, Exception):
|
13
14
|
yield v
|
promnesia/sources/takeout.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
'''
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
|
3
3
|
'''
|
4
|
-
|
4
|
+
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import warnings
|
8
|
+
from collections.abc import Iterable
|
9
|
+
from typing import Any, NamedTuple
|
6
10
|
|
7
|
-
from
|
8
|
-
from ..compat import removeprefix
|
11
|
+
from promnesia.common import Loc, Results, Visit, logger
|
9
12
|
|
10
13
|
|
11
14
|
# incase user is using an old version of google_takeout_parser
|
@@ -14,13 +17,20 @@ class YoutubeCSVStub(NamedTuple):
|
|
14
17
|
|
15
18
|
|
16
19
|
def index() -> Results:
|
17
|
-
from . import hpi
|
18
|
-
import json
|
20
|
+
from . import hpi # noqa: F401
|
19
21
|
|
20
22
|
try:
|
23
|
+
from google_takeout_parser.models import (
|
24
|
+
Activity,
|
25
|
+
ChromeHistory,
|
26
|
+
LikedYoutubeVideo,
|
27
|
+
YoutubeComment,
|
28
|
+
)
|
29
|
+
from google_takeout_parser.parse_csv import (
|
30
|
+
extract_comment_links,
|
31
|
+
reconstruct_comment_content,
|
32
|
+
)
|
21
33
|
from my.google.takeout.parser import events
|
22
|
-
from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
|
23
|
-
from google_takeout_parser.parse_csv import reconstruct_comment_content, extract_comment_links
|
24
34
|
except ModuleNotFoundError as ex:
|
25
35
|
logger.exception(ex)
|
26
36
|
yield ex
|
@@ -32,7 +42,7 @@ def index() -> Results:
|
|
32
42
|
return
|
33
43
|
|
34
44
|
|
35
|
-
_seen:
|
45
|
+
_seen: set[str] = {
|
36
46
|
# these are definitely not useful for promnesia
|
37
47
|
'Location',
|
38
48
|
'PlaceVisit',
|
@@ -54,7 +64,7 @@ def index() -> Results:
|
|
54
64
|
if et_name in _seen:
|
55
65
|
return
|
56
66
|
_seen.add(et_name)
|
57
|
-
yield RuntimeError(f"Unhandled event {
|
67
|
+
yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
|
58
68
|
|
59
69
|
for e in events():
|
60
70
|
if isinstance(e, Exception):
|
@@ -67,13 +77,13 @@ def index() -> Results:
|
|
67
77
|
# when you follow something from search the actual url goes after this
|
68
78
|
# e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
|
69
79
|
# note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
|
70
|
-
url = removeprefix(
|
80
|
+
url = url.removeprefix("https://www.google.com/url?q=")
|
71
81
|
title = e.title
|
72
82
|
|
73
83
|
if e.header == 'Chrome':
|
74
84
|
# title contains 'Visited <page title>' in this case
|
75
85
|
context = None
|
76
|
-
title = removeprefix(
|
86
|
+
title = title.removeprefix('Visited ')
|
77
87
|
elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
|
78
88
|
# todo perhaps could add to some sort of metadata?
|
79
89
|
# only useful for debugging really
|
@@ -131,7 +141,7 @@ def index() -> Results:
|
|
131
141
|
url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
|
132
142
|
)
|
133
143
|
elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
|
134
|
-
contentJSON =
|
144
|
+
contentJSON = e.contentJSON
|
135
145
|
content = reconstruct_comment_content(contentJSON, format='text')
|
136
146
|
if isinstance(content, Exception):
|
137
147
|
yield content
|
@@ -149,7 +159,7 @@ def index() -> Results:
|
|
149
159
|
url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
|
150
160
|
)
|
151
161
|
elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
|
152
|
-
contentJSON =
|
162
|
+
contentJSON = e.contentJSON
|
153
163
|
content = reconstruct_comment_content(contentJSON, format='text')
|
154
164
|
if isinstance(content, Exception):
|
155
165
|
yield content
|
@@ -1,9 +1,13 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from promnesia.common import Loc, Results, Visit, logger
|
4
|
+
|
2
5
|
|
3
6
|
# TODO make an iterator, insert in db as we go? handle errors gracefully?
|
4
7
|
def index() -> Results:
|
5
|
-
from . import hpi
|
8
|
+
from . import hpi # noqa: F401,I001
|
6
9
|
from my.google.takeout.paths import get_takeouts
|
10
|
+
|
7
11
|
takeouts = list(get_takeouts())
|
8
12
|
# TODO if no takeouts, raise?
|
9
13
|
# although could raise a warning on top level, when source emitted no takeouts
|
@@ -22,19 +26,17 @@ def index() -> Results:
|
|
22
26
|
|
23
27
|
|
24
28
|
|
25
|
-
import
|
26
|
-
from
|
29
|
+
import json
|
30
|
+
from collections.abc import Iterable
|
27
31
|
from datetime import datetime
|
28
|
-
from
|
32
|
+
from itertools import chain
|
29
33
|
from pathlib import Path
|
30
|
-
import json
|
31
|
-
|
32
|
-
|
33
|
-
from .. import config
|
34
|
-
|
35
34
|
|
35
|
+
import pytz
|
36
36
|
from more_itertools import unique_everseen
|
37
37
|
|
38
|
+
from promnesia import config
|
39
|
+
|
38
40
|
try:
|
39
41
|
from cachew import cachew
|
40
42
|
except ModuleNotFoundError as me:
|
@@ -50,7 +52,9 @@ TakeoutPath = Path
|
|
50
52
|
|
51
53
|
|
52
54
|
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
|
55
|
+
# FIXME switch to actual kompress? and use CPath?
|
53
56
|
from my.core.kompress import kexists
|
57
|
+
|
54
58
|
# TODO glob
|
55
59
|
# TODO not sure about windows path separators??
|
56
60
|
spath = 'Takeout/My Activity/' + kind
|
@@ -61,7 +65,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
|
|
61
65
|
|
62
66
|
locator = Loc.file(spath)
|
63
67
|
from my.google.takeout.html import read_html
|
64
|
-
for dt, url,
|
68
|
+
for dt, url, _title in read_html(takeout, spath):
|
65
69
|
yield Visit(
|
66
70
|
url=url,
|
67
71
|
dt=dt,
|