promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +4 -1
- promnesia/__main__.py +72 -59
- promnesia/cannon.py +90 -89
- promnesia/common.py +74 -62
- promnesia/compare.py +15 -10
- promnesia/config.py +22 -17
- promnesia/database/dump.py +1 -2
- promnesia/extract.py +6 -6
- promnesia/logging.py +27 -15
- promnesia/misc/install_server.py +25 -19
- promnesia/server.py +69 -53
- promnesia/sources/auto.py +65 -51
- promnesia/sources/browser.py +7 -2
- promnesia/sources/browser_legacy.py +51 -40
- promnesia/sources/demo.py +0 -1
- promnesia/sources/fbmessenger.py +0 -1
- promnesia/sources/filetypes.py +15 -11
- promnesia/sources/github.py +4 -1
- promnesia/sources/guess.py +4 -1
- promnesia/sources/hackernews.py +5 -7
- promnesia/sources/hpi.py +3 -1
- promnesia/sources/html.py +4 -2
- promnesia/sources/instapaper.py +1 -0
- promnesia/sources/markdown.py +4 -4
- promnesia/sources/org.py +17 -8
- promnesia/sources/plaintext.py +14 -11
- promnesia/sources/pocket.py +2 -1
- promnesia/sources/reddit.py +5 -8
- promnesia/sources/roamresearch.py +3 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +3 -6
- promnesia/sources/signal.py +14 -14
- promnesia/sources/smscalls.py +0 -1
- promnesia/sources/stackexchange.py +2 -2
- promnesia/sources/takeout.py +14 -21
- promnesia/sources/takeout_legacy.py +16 -10
- promnesia/sources/telegram.py +7 -3
- promnesia/sources/telegram_legacy.py +5 -5
- promnesia/sources/twitter.py +1 -1
- promnesia/sources/vcs.py +6 -3
- promnesia/sources/viber.py +2 -2
- promnesia/sources/website.py +4 -3
- promnesia/sqlite.py +10 -7
- promnesia/tests/common.py +2 -0
- promnesia/tests/server_helper.py +2 -2
- promnesia/tests/sources/test_filetypes.py +9 -7
- promnesia/tests/sources/test_hypothesis.py +7 -3
- promnesia/tests/sources/test_org.py +7 -2
- promnesia/tests/sources/test_plaintext.py +9 -7
- promnesia/tests/sources/test_shellcmd.py +10 -9
- promnesia/tests/test_cannon.py +254 -237
- promnesia/tests/test_cli.py +8 -2
- promnesia/tests/test_compare.py +16 -12
- promnesia/tests/test_db_dump.py +4 -3
- promnesia/tests/test_extract.py +7 -4
- promnesia/tests/test_indexer.py +10 -10
- promnesia/tests/test_server.py +10 -10
- promnesia/tests/utils.py +1 -5
- promnesia-1.4.20250909.dist-info/METADATA +66 -0
- promnesia-1.4.20250909.dist-info/RECORD +80 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
- promnesia/kjson.py +0 -122
- promnesia/sources/__init__.pyi +0 -0
- promnesia-1.3.20241021.dist-info/METADATA +0 -55
- promnesia-1.3.20241021.dist-info/RECORD +0 -83
- promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/sources/github.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] github module
|
3
3
|
'''
|
4
|
+
|
4
5
|
from __future__ import annotations
|
5
6
|
|
6
7
|
# Note: requires the 'mistletoe' module if you enable render_markdown
|
@@ -16,7 +17,9 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
16
17
|
from .markdown import TextParser, extract_from_text
|
17
18
|
except ImportError as import_err:
|
18
19
|
logger.exception(import_err)
|
19
|
-
logger.critical(
|
20
|
+
logger.critical(
|
21
|
+
"Could not import markdown module to render github body markdown. Try 'python3 -m pip install mistletoe'"
|
22
|
+
)
|
20
23
|
render_markdown = False
|
21
24
|
|
22
25
|
for e in events():
|
promnesia/sources/guess.py
CHANGED
@@ -22,14 +22,17 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
22
22
|
ps = str(path)
|
23
23
|
# TODO better url detection
|
24
24
|
|
25
|
-
index_: Any
|
25
|
+
index_: Any # meh
|
26
26
|
if is_git_repo(ps):
|
27
27
|
from . import vcs
|
28
|
+
|
28
29
|
index_ = vcs.index
|
29
30
|
elif is_website(ps):
|
30
31
|
from . import website
|
32
|
+
|
31
33
|
index_ = website.index
|
32
34
|
else:
|
33
35
|
from . import auto
|
36
|
+
|
34
37
|
index_ = auto.index
|
35
38
|
yield from index_(path, *args, **kwargs)
|
promnesia/sources/hackernews.py
CHANGED
@@ -21,9 +21,7 @@ def index() -> Results:
|
|
21
21
|
title = item.title
|
22
22
|
elif item.text_html:
|
23
23
|
title = item.text_html
|
24
|
-
title = textwrap.shorten(
|
25
|
-
title, width=79, placeholder="…",
|
26
|
-
break_long_words=True)
|
24
|
+
title = textwrap.shorten(title, width=79, placeholder="…", break_long_words=True)
|
27
25
|
# The locator is always the HN story. If the story is a link (as
|
28
26
|
# opposed to a text post), we insert a visit such that the link
|
29
27
|
# will point back to the corresponding HN story.
|
@@ -33,8 +31,8 @@ def index() -> Results:
|
|
33
31
|
urls.append(item.url)
|
34
32
|
for url in urls:
|
35
33
|
yield Visit(
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
url=url,
|
35
|
+
dt=item.created,
|
36
|
+
locator=loc,
|
37
|
+
context=title,
|
40
38
|
)
|
promnesia/sources/hpi.py
CHANGED
@@ -8,4 +8,6 @@ try:
|
|
8
8
|
import my # noqa: F401
|
9
9
|
except ImportError as e:
|
10
10
|
logger.exception(e)
|
11
|
-
logger.critical(
|
11
|
+
logger.critical(
|
12
|
+
"Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')"
|
13
|
+
)
|
promnesia/sources/html.py
CHANGED
@@ -7,7 +7,7 @@ from __future__ import annotations
|
|
7
7
|
from collections.abc import Iterator
|
8
8
|
from pathlib import Path
|
9
9
|
|
10
|
-
from bs4 import BeautifulSoup
|
10
|
+
from bs4 import BeautifulSoup, Tag
|
11
11
|
|
12
12
|
from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
|
13
13
|
|
@@ -25,11 +25,13 @@ def extract_urls_from_html(s: str) -> Iterator[Url]:
|
|
25
25
|
"""
|
26
26
|
soup = BeautifulSoup(s, 'lxml')
|
27
27
|
for a in soup.find_all('a'):
|
28
|
+
assert isinstance(a, Tag), a # make mypy happy
|
28
29
|
href = a.attrs.get('href')
|
29
30
|
if href is None or ('://' not in href):
|
30
31
|
# second condition means relative link
|
31
32
|
continue
|
32
|
-
|
33
|
+
assert isinstance(href, str), href # make mypy happy
|
34
|
+
text: str = a.text
|
33
35
|
yield (href, text)
|
34
36
|
|
35
37
|
|
promnesia/sources/instapaper.py
CHANGED
promnesia/sources/markdown.py
CHANGED
@@ -4,10 +4,10 @@ from collections.abc import Iterator
|
|
4
4
|
from pathlib import Path
|
5
5
|
from typing import NamedTuple
|
6
6
|
|
7
|
-
import mistletoe # type: ignore
|
8
|
-
import mistletoe.block_token as BT # type: ignore
|
9
|
-
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
10
|
-
from mistletoe.span_token import AutoLink, Link # type: ignore
|
7
|
+
import mistletoe # type: ignore[import-untyped]
|
8
|
+
import mistletoe.block_token as BT # type: ignore[import-untyped]
|
9
|
+
from mistletoe.html_renderer import HTMLRenderer # type: ignore[import-untyped]
|
10
|
+
from mistletoe.span_token import AutoLink, Link # type: ignore[import-untyped]
|
11
11
|
|
12
12
|
from promnesia.common import (
|
13
13
|
Extraction,
|
promnesia/sources/org.py
CHANGED
@@ -4,7 +4,7 @@ import re
|
|
4
4
|
from collections.abc import Iterable, Iterator
|
5
5
|
from datetime import datetime
|
6
6
|
from pathlib import Path
|
7
|
-
from typing import NamedTuple,
|
7
|
+
from typing import NamedTuple, cast
|
8
8
|
|
9
9
|
import orgparse
|
10
10
|
from orgparse.date import OrgDate, gene_timestamp_regex
|
@@ -25,6 +25,8 @@ from promnesia.common import (
|
|
25
25
|
UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
|
26
26
|
|
27
27
|
_warned = False
|
28
|
+
|
29
|
+
|
28
30
|
def warn_old_orgparse_once() -> Iterable[Exception]:
|
29
31
|
global _warned
|
30
32
|
if _warned:
|
@@ -45,6 +47,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
|
|
45
47
|
** subnote
|
46
48
|
"""
|
47
49
|
|
50
|
+
|
48
51
|
class Parsed(NamedTuple):
|
49
52
|
dt: datetime | None
|
50
53
|
heading: str
|
@@ -56,14 +59,14 @@ def _parse_node(n: OrgNode) -> Parsed:
|
|
56
59
|
|
57
60
|
heading = n.get_heading('raw')
|
58
61
|
pp = n.properties
|
59
|
-
createds = cast(
|
62
|
+
createds = cast(str | None, pp.get('CREATED', None))
|
60
63
|
if createds is None:
|
61
64
|
# TODO replace with 'match', but need to strip off priority etc first?
|
62
65
|
# see _parse_heading in orgparse
|
63
66
|
# todo maybe use n.get_timestamps(inactive=True, point=True)? only concern is that it's searching in the body as well?
|
64
67
|
m = CREATED_RGX.search(heading)
|
65
68
|
if m is not None:
|
66
|
-
createds = m.group(0)
|
69
|
+
createds = m.group(0) # could be None
|
67
70
|
# todo a bit hacky..
|
68
71
|
heading = heading.replace(createds + ' ', '')
|
69
72
|
if createds is not None:
|
@@ -72,7 +75,11 @@ def _parse_node(n: OrgNode) -> Parsed:
|
|
72
75
|
dt = None
|
73
76
|
else:
|
74
77
|
[odt] = OrgDate.list_from_str(createds)
|
75
|
-
|
78
|
+
start = odt.start
|
79
|
+
if not isinstance(start, datetime): # could be date
|
80
|
+
dt = datetime.combine(start, datetime.min.time()) # meh, but the best we can do?
|
81
|
+
else:
|
82
|
+
dt = start
|
76
83
|
else:
|
77
84
|
dt = None
|
78
85
|
return Parsed(dt=dt, heading=heading)
|
@@ -160,7 +167,7 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
160
167
|
|
161
168
|
(parsed, node) = wr
|
162
169
|
dt = parsed.dt
|
163
|
-
assert dt is not None
|
170
|
+
assert dt is not None # shouldn't be because of fallback
|
164
171
|
for r in iter_org_urls(node):
|
165
172
|
# TODO get body recursively? not sure
|
166
173
|
try:
|
@@ -170,7 +177,7 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
170
177
|
ctx = parsed.heading + tagss + '\n' + get_body_compat(node)
|
171
178
|
except Exception as e:
|
172
179
|
yield e
|
173
|
-
ctx = 'ERROR'
|
180
|
+
ctx = 'ERROR' # TODO more context?
|
174
181
|
|
175
182
|
if isinstance(r, Url):
|
176
183
|
yield Visit(
|
@@ -178,9 +185,11 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
178
185
|
dt=dt,
|
179
186
|
locator=Loc.file(
|
180
187
|
fname,
|
181
|
-
line=getattr(
|
188
|
+
line=getattr(
|
189
|
+
node, 'linenumber', None
|
190
|
+
), # make it defensive so it works against older orgparse (pre 0.2)
|
182
191
|
),
|
183
192
|
context=ctx,
|
184
193
|
)
|
185
|
-
else:
|
194
|
+
else: # error
|
186
195
|
yield r
|
promnesia/sources/plaintext.py
CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from functools import lru_cache
|
4
4
|
from pathlib import Path
|
5
5
|
|
6
|
-
from promnesia.common import PathIsh, _is_windows
|
6
|
+
from promnesia.common import PathIsh, _is_windows
|
7
7
|
|
8
8
|
# https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
|
9
9
|
_URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
|
@@ -17,6 +17,7 @@ if _is_windows:
|
|
17
17
|
@lru_cache
|
18
18
|
def _has_grep() -> bool:
|
19
19
|
import shutil
|
20
|
+
|
20
21
|
return shutil.which('grep') is not None
|
21
22
|
|
22
23
|
|
@@ -25,9 +26,9 @@ Command = list[str]
|
|
25
26
|
|
26
27
|
_GREP_ARGS: Command = [
|
27
28
|
'--color=never',
|
28
|
-
'-H',
|
29
|
-
'-n',
|
30
|
-
'-I',
|
29
|
+
'-H', # always show filename TODO not sure if works on osx
|
30
|
+
'-n', # print line numbers (to restore context)
|
31
|
+
'-I', # ignore binaries
|
31
32
|
]
|
32
33
|
|
33
34
|
if not _is_windows:
|
@@ -36,6 +37,7 @@ if not _is_windows:
|
|
36
37
|
'--exclude-dir=".git"',
|
37
38
|
]
|
38
39
|
|
40
|
+
|
39
41
|
# NOTE: grep/findstr exit with code 1 on no matches...
|
40
42
|
# we hack around it in shellcmd module (search 'grep')
|
41
43
|
def _grep(*, paths: list[str], recursive: bool) -> Command:
|
@@ -43,11 +45,12 @@ def _grep(*, paths: list[str], recursive: bool) -> Command:
|
|
43
45
|
'grep',
|
44
46
|
*(['-r'] if recursive else []),
|
45
47
|
*_GREP_ARGS,
|
46
|
-
'-E',
|
48
|
+
'-E', # 'extended' syntax
|
47
49
|
_URL_REGEX,
|
48
50
|
*paths,
|
49
51
|
]
|
50
52
|
|
53
|
+
|
51
54
|
def _findstr(*, path: str, recursive: bool) -> Command:
|
52
55
|
return [
|
53
56
|
'findstr',
|
@@ -85,18 +88,18 @@ def _extract_from_file(path: str) -> Command:
|
|
85
88
|
def extract_from_path(path: PathIsh) -> Command:
|
86
89
|
pp = Path(path)
|
87
90
|
|
88
|
-
|
89
|
-
|
90
|
-
logger = get_logger()
|
91
|
-
if pp.is_dir(): # TODO handle archives here???
|
91
|
+
if pp.is_dir(): # TODO handle archives here???
|
92
92
|
return _extract_from_dir(str(pp))
|
93
93
|
|
94
|
-
if any(
|
94
|
+
if any(
|
95
|
+
pp.suffix == ex
|
96
|
+
for ex in (
|
95
97
|
'.xz',
|
96
98
|
'.bz2',
|
97
99
|
'.gz',
|
98
100
|
'.zip',
|
99
|
-
|
101
|
+
)
|
102
|
+
):
|
100
103
|
# todo should be debug?
|
101
104
|
# or should delete it completely, feels like unpacking archives here is a bit too much
|
102
105
|
raise RuntimeError(f"Archives aren't supported yet: {path}")
|
promnesia/sources/pocket.py
CHANGED
@@ -16,7 +16,8 @@ def index() -> Results:
|
|
16
16
|
loc = Loc.make(title=title, href=a.pocket_link)
|
17
17
|
# Add a reverse locator so that the Promnesia browser extension shows a
|
18
18
|
# link on the Pocket page back to the original URL.
|
19
|
-
|
19
|
+
# FIXME need to actually use it
|
20
|
+
_loc_rev = Loc.make(title=title, href=a.url)
|
20
21
|
hls = a.highlights
|
21
22
|
excerpt = a.json.get('excerpt', None)
|
22
23
|
if len(hls) == 0:
|
promnesia/sources/reddit.py
CHANGED
@@ -21,6 +21,7 @@ def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | Non
|
|
21
21
|
except ModuleNotFoundError as e:
|
22
22
|
if "No module named 'my.reddit.all'" in str(e):
|
23
23
|
import warnings
|
24
|
+
|
24
25
|
warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
|
25
26
|
from my.reddit import comments, saved, submissions, upvoted
|
26
27
|
else:
|
@@ -69,6 +70,7 @@ class RedditRenderer:
|
|
69
70
|
self._parser_cls = None
|
70
71
|
try:
|
71
72
|
from .markdown import TextParser, extract_from_text
|
73
|
+
|
72
74
|
self._link_extractor = extract_from_text
|
73
75
|
self._parser_cls = TextParser
|
74
76
|
except ImportError as import_err:
|
@@ -78,11 +80,12 @@ class RedditRenderer:
|
|
78
80
|
# only send error if the user is trying to enable this feature
|
79
81
|
if render_markdown:
|
80
82
|
logger.exception(import_err)
|
81
|
-
logger.critical(
|
83
|
+
logger.critical(
|
84
|
+
"Could not import markdown module to render reddit markdown. Try 'python3 -m pip install mistletoe'"
|
85
|
+
)
|
82
86
|
render_markdown = False # force to be false, couldn't import
|
83
87
|
self.render_markdown = render_markdown
|
84
88
|
|
85
|
-
|
86
89
|
def _from_comment(self, i: Comment) -> Results:
|
87
90
|
locator = Loc.make(
|
88
91
|
title='Reddit comment',
|
@@ -90,7 +93,6 @@ class RedditRenderer:
|
|
90
93
|
)
|
91
94
|
yield from self._from_common(i, locator=locator)
|
92
95
|
|
93
|
-
|
94
96
|
def _from_submission(self, i: Submission) -> Results:
|
95
97
|
locator = Loc.make(
|
96
98
|
title=f'Reddit submission: {i.title}',
|
@@ -98,7 +100,6 @@ class RedditRenderer:
|
|
98
100
|
)
|
99
101
|
yield from self._from_common(i, locator=locator)
|
100
102
|
|
101
|
-
|
102
103
|
def _from_upvote(self, i: Upvote) -> Results:
|
103
104
|
locator = Loc.make(
|
104
105
|
title='Reddit upvote',
|
@@ -106,7 +107,6 @@ class RedditRenderer:
|
|
106
107
|
)
|
107
108
|
yield from self._from_common(i, locator=locator)
|
108
109
|
|
109
|
-
|
110
110
|
def _from_save(self, i: Save) -> Results:
|
111
111
|
locator = Loc.make(
|
112
112
|
title='Reddit save',
|
@@ -114,7 +114,6 @@ class RedditRenderer:
|
|
114
114
|
)
|
115
115
|
yield from self._from_common(i, locator=locator)
|
116
116
|
|
117
|
-
|
118
117
|
# to allow for possible subclassing by the user?
|
119
118
|
def _render_body(self, text: str) -> str:
|
120
119
|
if self.render_markdown and self._parser_cls is not None:
|
@@ -122,7 +121,6 @@ class RedditRenderer:
|
|
122
121
|
else:
|
123
122
|
return text
|
124
123
|
|
125
|
-
|
126
124
|
def _from_common(self, i: RedditBase, locator: Loc) -> Results:
|
127
125
|
urls = [i.url]
|
128
126
|
# TODO this should belong to HPI.. fix permalink handling I guess
|
@@ -170,4 +168,3 @@ class RedditRenderer:
|
|
170
168
|
locator=locator,
|
171
169
|
)
|
172
170
|
emitted.add(res.url)
|
173
|
-
|
@@ -7,6 +7,7 @@ from promnesia.common import Loc, Results, Visit, extract_urls
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
9
|
import my.roamresearch as RR
|
10
|
+
|
10
11
|
roam = RR.roam()
|
11
12
|
for node in roam.traverse():
|
12
13
|
yield from _collect(node)
|
@@ -14,7 +15,7 @@ def index() -> Results:
|
|
14
15
|
|
15
16
|
def _collect(node: 'RoamNode') -> Results:
|
16
17
|
title = node.title
|
17
|
-
body
|
18
|
+
body = node.body or ''
|
18
19
|
if title is None:
|
19
20
|
# most notes don't have title, so we just take the first line instead..
|
20
21
|
lines = body.splitlines(keepends=True)
|
@@ -46,4 +47,5 @@ import typing
|
|
46
47
|
|
47
48
|
if typing.TYPE_CHECKING:
|
48
49
|
import my.roamresearch as RR
|
50
|
+
|
49
51
|
RoamNode = RR.Node
|
promnesia/sources/rss.py
CHANGED
@@ -2,17 +2,16 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from datetime import datetime
|
6
|
-
|
7
|
-
import pytz
|
5
|
+
from datetime import datetime, timezone
|
8
6
|
|
9
7
|
from promnesia.common import Loc, Results, Visit
|
10
8
|
|
11
9
|
# arbitrary, 2011-11-04 00:05:23.283+00:00
|
12
|
-
default_datetime = datetime.fromtimestamp(1320365123, tz=
|
10
|
+
default_datetime = datetime.fromtimestamp(1320365123, tz=timezone.utc)
|
13
11
|
# TODO FIXME allow for visit not to have datetime?
|
14
12
|
# I.e. even having context is pretty good!
|
15
13
|
|
14
|
+
|
16
15
|
def index() -> Results:
|
17
16
|
from my.rss.all import subscriptions
|
18
17
|
|
@@ -22,6 +21,6 @@ def index() -> Results:
|
|
22
21
|
yield Visit(
|
23
22
|
url=feed.url,
|
24
23
|
dt=feed.created_at or default_datetime,
|
25
|
-
context='RSS subscription',
|
24
|
+
context='RSS subscription', # TODO use 'provider', etc?
|
26
25
|
locator=locator,
|
27
26
|
)
|
promnesia/sources/shellcmd.py
CHANGED
@@ -19,7 +19,6 @@ from promnesia.common import (
|
|
19
19
|
_is_windows,
|
20
20
|
extract_urls,
|
21
21
|
file_mtime,
|
22
|
-
get_system_tz,
|
23
22
|
now_tz,
|
24
23
|
)
|
25
24
|
|
@@ -37,8 +36,6 @@ def index(command: str | Sequence[PathIsh]) -> Results:
|
|
37
36
|
cmds = ' '.join(map(str, command))
|
38
37
|
cmd = command
|
39
38
|
|
40
|
-
tz = get_system_tz()
|
41
|
-
|
42
39
|
# ugh... on windows grep does something nasty? e.g:
|
43
40
|
# grep --color=never -r -H -n -I -E http 'D:\\a\\promnesia\\promnesia\\tests\\testdata\\custom'
|
44
41
|
# D:\a\promnesia\promnesia\tests\testdata\custom/file1.txt:1:Right, so this points at http://google.com
|
@@ -55,9 +52,9 @@ def index(command: str | Sequence[PathIsh]) -> Results:
|
|
55
52
|
fname = None
|
56
53
|
lineno = None
|
57
54
|
else:
|
58
|
-
fname
|
55
|
+
fname = m.group(1)
|
59
56
|
lineno = int(m.group(2))
|
60
|
-
line
|
57
|
+
line = m.group(3)
|
61
58
|
|
62
59
|
if fname is not None and needs_windows_grep_patching:
|
63
60
|
fname = fname.replace('/', os.sep)
|
@@ -86,7 +83,7 @@ def index(command: str | Sequence[PathIsh]) -> Results:
|
|
86
83
|
|
87
84
|
r = run(cmd, stdout=PIPE, check=False)
|
88
85
|
if r.returncode > 0:
|
89
|
-
if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1):
|
86
|
+
if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
|
90
87
|
r.check_returncode()
|
91
88
|
output = r.stdout
|
92
89
|
assert output is not None
|
promnesia/sources/signal.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
Collects visits from Signal Desktop's encrypted SQLIite db(s).
|
3
3
|
"""
|
4
|
+
|
4
5
|
from __future__ import annotations
|
5
6
|
|
6
7
|
# Functions get their defaults from module-data.
|
@@ -17,17 +18,15 @@ from collections.abc import Iterable, Iterator, Mapping
|
|
17
18
|
from contextlib import contextmanager
|
18
19
|
from pathlib import Path
|
19
20
|
from textwrap import dedent, indent
|
20
|
-
from typing import Any
|
21
|
+
from typing import Any
|
21
22
|
|
22
23
|
from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
|
23
24
|
|
24
|
-
PathIshes = Union[PathIsh, Iterable[PathIsh]]
|
25
|
-
|
26
25
|
|
27
26
|
def index(
|
28
27
|
*db_paths: PathIsh,
|
29
28
|
http_only: bool = False,
|
30
|
-
locator_schema: str="editor",
|
29
|
+
locator_schema: str = "editor",
|
31
30
|
append_platform_path: bool = False,
|
32
31
|
override_key: str | None = None,
|
33
32
|
) -> Results:
|
@@ -51,8 +50,7 @@ def index(
|
|
51
50
|
otherwise, this same key is used for harvesting all db-files.
|
52
51
|
"""
|
53
52
|
logger.debug(
|
54
|
-
"http_only?(%s), locator_schema?(%s), append_platform_path?(%s), "
|
55
|
-
"overide_key given?(%s), db_paths: %s",
|
53
|
+
"http_only?(%s), locator_schema?(%s), append_platform_path?(%s), overide_key given?(%s), db_paths: %s",
|
56
54
|
http_only,
|
57
55
|
locator_schema,
|
58
56
|
append_platform_path,
|
@@ -171,7 +169,10 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
|
|
171
169
|
|
172
170
|
Expansion code adapted from https://stackoverflow.com/a/51108375/548792
|
173
171
|
to handle also degenerate cases (``'', '.', '/'``):
|
172
|
+
"""
|
174
173
|
|
174
|
+
# NOTE: suppressing doctest from github actions
|
175
|
+
"""
|
175
176
|
>>> str(next(iter(_get_files('/'))))
|
176
177
|
'/'
|
177
178
|
|
@@ -194,7 +195,7 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
|
|
194
195
|
return path.glob(str(Path(*parts))) if parts else [path]
|
195
196
|
|
196
197
|
|
197
|
-
def _expand_paths(paths:
|
198
|
+
def _expand_paths(paths: PathIsh | Iterable[PathIsh]) -> Iterable[Path]:
|
198
199
|
if _is_pathish(paths):
|
199
200
|
paths = [paths] # type: ignore[list-item]
|
200
201
|
return [pp.resolve() for p in paths for pp in _expand_path(p)] # type: ignore[union-attr]
|
@@ -214,7 +215,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
|
|
214
215
|
one or more pathish
|
215
216
|
|
216
217
|
Note: needed `append` here, to resolve paths.
|
218
|
+
"""
|
217
219
|
|
220
|
+
# NOTE: suppressing doctest from running on Github actions
|
221
|
+
"""
|
218
222
|
>>> bool(collect_db_paths()) # my home-path
|
219
223
|
True
|
220
224
|
>>> collect_db_paths(None)
|
@@ -239,8 +243,7 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
|
|
239
243
|
plat_paths = platform_db_paths[platform_name]
|
240
244
|
except LookupError as le:
|
241
245
|
raise ValueError(
|
242
|
-
f"Unknown platform({platform_name}
|
243
|
-
f"\n Expected one of {list(platform_db_paths.keys())}."
|
246
|
+
f"Unknown platform({platform_name}!\n Expected one of {list(platform_db_paths.keys())}."
|
244
247
|
) from le
|
245
248
|
|
246
249
|
if db_paths and append:
|
@@ -325,9 +328,7 @@ def connect_db(
|
|
325
328
|
)
|
326
329
|
sql = "\n".join(sql_cmds)
|
327
330
|
cmd = [sqlcipher_exe, str(db_path)]
|
328
|
-
logger.debug(
|
329
|
-
"Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
|
330
|
-
)
|
331
|
+
logger.debug("Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql)
|
331
332
|
try:
|
332
333
|
sbp.run(
|
333
334
|
cmd,
|
@@ -358,12 +359,11 @@ def connect_db(
|
|
358
359
|
yield db
|
359
360
|
finally:
|
360
361
|
try:
|
361
|
-
if db:
|
362
|
+
if db is not None:
|
362
363
|
db.close()
|
363
364
|
finally:
|
364
365
|
if decrypted_file and decrypted_file.exists():
|
365
366
|
try:
|
366
|
-
|
367
367
|
logger.debug("Deleting temporary decrypted db: %s", decrypted_file)
|
368
368
|
decrypted_file.unlink()
|
369
369
|
except Exception as ex:
|
promnesia/sources/smscalls.py
CHANGED
@@ -16,7 +16,7 @@ def index() -> Results:
|
|
16
16
|
yield Visit(
|
17
17
|
url=v.link,
|
18
18
|
dt=v.when,
|
19
|
-
context='voted',
|
19
|
+
context='voted', # todo use the votetype? although maybe worth ignoring downvotes
|
20
20
|
# or, downvotes could have 'negative' ranking or something
|
21
|
-
locator=Loc.make(title='voted', href=v.link)
|
21
|
+
locator=Loc.make(title='voted', href=v.link),
|
22
22
|
)
|
promnesia/sources/takeout.py
CHANGED
@@ -35,13 +35,15 @@ def index() -> Results:
|
|
35
35
|
logger.exception(ex)
|
36
36
|
yield ex
|
37
37
|
|
38
|
-
warnings.warn(
|
38
|
+
warnings.warn(
|
39
|
+
"Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation."
|
40
|
+
)
|
39
41
|
|
40
42
|
from . import takeout_legacy
|
43
|
+
|
41
44
|
yield from takeout_legacy.index()
|
42
45
|
return
|
43
46
|
|
44
|
-
|
45
47
|
_seen: set[str] = {
|
46
48
|
# these are definitely not useful for promnesia
|
47
49
|
'Location',
|
@@ -52,10 +54,13 @@ def index() -> Results:
|
|
52
54
|
imported_yt_csv_models = False
|
53
55
|
try:
|
54
56
|
from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
|
57
|
+
|
55
58
|
imported_yt_csv_models = True
|
56
59
|
except ImportError:
|
57
60
|
# warn user to upgrade google_takeout_parser
|
58
|
-
warnings.warn(
|
61
|
+
warnings.warn(
|
62
|
+
"Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments"
|
63
|
+
)
|
59
64
|
CSVYoutubeComment = YoutubeCSVStub # type: ignore[misc,assignment]
|
60
65
|
CSVYoutubeLiveChat = YoutubeCSVStub # type: ignore[misc,assignment]
|
61
66
|
|
@@ -130,16 +135,12 @@ def index() -> Results:
|
|
130
135
|
elif isinstance(e, LikedYoutubeVideo):
|
131
136
|
# TODO not sure if desc makes sense here since it's not user produced data
|
132
137
|
# it's just a part of video meta?
|
133
|
-
yield Visit(
|
134
|
-
url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link)
|
135
|
-
)
|
138
|
+
yield Visit(url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link))
|
136
139
|
elif isinstance(e, YoutubeComment):
|
137
140
|
for url in e.urls:
|
138
141
|
# todo: use url_metadata to improve locator?
|
139
142
|
# or maybe just extract first sentence?
|
140
|
-
yield Visit(
|
141
|
-
url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
|
142
|
-
)
|
143
|
+
yield Visit(url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url))
|
143
144
|
elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
|
144
145
|
contentJSON = e.contentJSON
|
145
146
|
content = reconstruct_comment_content(contentJSON, format='text')
|
@@ -152,12 +153,8 @@ def index() -> Results:
|
|
152
153
|
continue
|
153
154
|
context = f"Commented on {e.video_url}"
|
154
155
|
for url in links:
|
155
|
-
yield Visit(
|
156
|
-
|
157
|
-
)
|
158
|
-
yield Visit(
|
159
|
-
url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
|
160
|
-
)
|
156
|
+
yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
|
157
|
+
yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
|
161
158
|
elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
|
162
159
|
contentJSON = e.contentJSON
|
163
160
|
content = reconstruct_comment_content(contentJSON, format='text')
|
@@ -170,12 +167,8 @@ def index() -> Results:
|
|
170
167
|
continue
|
171
168
|
context = f"Commented on livestream {e.video_url}"
|
172
169
|
for url in links:
|
173
|
-
yield Visit(
|
174
|
-
|
175
|
-
)
|
176
|
-
yield Visit(
|
177
|
-
url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
|
178
|
-
)
|
170
|
+
yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
|
171
|
+
yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
|
179
172
|
else:
|
180
173
|
yield from warn_once_if_not_seen(e)
|
181
174
|
|