promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +18 -4
- promnesia/__main__.py +104 -78
- promnesia/cannon.py +108 -107
- promnesia/common.py +107 -88
- promnesia/compare.py +33 -30
- promnesia/compat.py +10 -10
- promnesia/config.py +37 -34
- promnesia/database/common.py +4 -3
- promnesia/database/dump.py +13 -13
- promnesia/database/load.py +7 -7
- promnesia/extract.py +19 -17
- promnesia/logging.py +27 -15
- promnesia/misc/install_server.py +32 -27
- promnesia/server.py +106 -79
- promnesia/sources/auto.py +104 -77
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +20 -10
- promnesia/sources/browser_legacy.py +65 -50
- promnesia/sources/demo.py +7 -8
- promnesia/sources/fbmessenger.py +3 -3
- promnesia/sources/filetypes.py +22 -16
- promnesia/sources/github.py +9 -8
- promnesia/sources/guess.py +6 -2
- promnesia/sources/hackernews.py +7 -9
- promnesia/sources/hpi.py +5 -3
- promnesia/sources/html.py +11 -7
- promnesia/sources/hypothesis.py +3 -2
- promnesia/sources/instapaper.py +3 -2
- promnesia/sources/markdown.py +22 -12
- promnesia/sources/org.py +36 -17
- promnesia/sources/plaintext.py +41 -39
- promnesia/sources/pocket.py +5 -3
- promnesia/sources/reddit.py +24 -26
- promnesia/sources/roamresearch.py +5 -2
- promnesia/sources/rss.py +6 -8
- promnesia/sources/shellcmd.py +21 -11
- promnesia/sources/signal.py +27 -26
- promnesia/sources/smscalls.py +2 -3
- promnesia/sources/stackexchange.py +5 -4
- promnesia/sources/takeout.py +37 -34
- promnesia/sources/takeout_legacy.py +29 -19
- promnesia/sources/telegram.py +18 -12
- promnesia/sources/telegram_legacy.py +22 -11
- promnesia/sources/twitter.py +7 -6
- promnesia/sources/vcs.py +11 -6
- promnesia/sources/viber.py +11 -10
- promnesia/sources/website.py +8 -7
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +13 -7
- promnesia/tests/common.py +10 -5
- promnesia/tests/server_helper.py +13 -10
- promnesia/tests/sources/test_auto.py +2 -3
- promnesia/tests/sources/test_filetypes.py +11 -8
- promnesia/tests/sources/test_hypothesis.py +10 -6
- promnesia/tests/sources/test_org.py +9 -5
- promnesia/tests/sources/test_plaintext.py +9 -8
- promnesia/tests/sources/test_shellcmd.py +13 -13
- promnesia/tests/sources/test_takeout.py +3 -5
- promnesia/tests/test_cannon.py +256 -239
- promnesia/tests/test_cli.py +12 -8
- promnesia/tests/test_compare.py +17 -13
- promnesia/tests/test_config.py +7 -8
- promnesia/tests/test_db_dump.py +15 -15
- promnesia/tests/test_extract.py +17 -10
- promnesia/tests/test_indexer.py +24 -18
- promnesia/tests/test_server.py +12 -13
- promnesia/tests/test_traverse.py +0 -2
- promnesia/tests/utils.py +3 -7
- promnesia-1.4.20250909.dist-info/METADATA +66 -0
- promnesia-1.4.20250909.dist-info/RECORD +80 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
- promnesia/kjson.py +0 -121
- promnesia/sources/__init__.pyi +0 -0
- promnesia-1.2.20240810.dist-info/METADATA +0 -54
- promnesia-1.2.20240810.dist-info/RECORD +0 -83
- promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/sources/markdown.py
CHANGED
@@ -1,14 +1,24 @@
|
|
1
|
-
from
|
2
|
-
from typing import Iterator, NamedTuple, Optional
|
3
|
-
|
4
|
-
from ..common import Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
|
5
|
-
|
6
|
-
|
7
|
-
import mistletoe # type: ignore
|
8
|
-
from mistletoe.span_token import AutoLink, Link # type: ignore
|
9
|
-
import mistletoe.block_token as BT # type: ignore
|
10
|
-
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
1
|
+
from __future__ import annotations
|
11
2
|
|
3
|
+
from collections.abc import Iterator
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import NamedTuple
|
6
|
+
|
7
|
+
import mistletoe # type: ignore[import-untyped]
|
8
|
+
import mistletoe.block_token as BT # type: ignore[import-untyped]
|
9
|
+
from mistletoe.html_renderer import HTMLRenderer # type: ignore[import-untyped]
|
10
|
+
from mistletoe.span_token import AutoLink, Link # type: ignore[import-untyped]
|
11
|
+
|
12
|
+
from promnesia.common import (
|
13
|
+
Extraction,
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Url,
|
18
|
+
Visit,
|
19
|
+
file_mtime,
|
20
|
+
logger,
|
21
|
+
)
|
12
22
|
|
13
23
|
renderer = HTMLRenderer()
|
14
24
|
|
@@ -18,7 +28,7 @@ block_tokens = tuple(getattr(BT, name) for name in BT.__all__)
|
|
18
28
|
|
19
29
|
class Parsed(NamedTuple):
|
20
30
|
url: Url
|
21
|
-
context:
|
31
|
+
context: str | None
|
22
32
|
|
23
33
|
|
24
34
|
Result = Res[Parsed]
|
@@ -118,7 +128,7 @@ class TextParser(Parser):
|
|
118
128
|
self._html = HTML_MARKER + _ashtml(self.doc)
|
119
129
|
return self._html
|
120
130
|
|
121
|
-
def _extract(self, cur, last_block=None) -> Iterator[Parsed]:
|
131
|
+
def _extract(self, cur, last_block=None) -> Iterator[Parsed]: # noqa: ARG002
|
122
132
|
if not isinstance(cur, (AutoLink, Link)):
|
123
133
|
return
|
124
134
|
|
promnesia/sources/org.py
CHANGED
@@ -1,20 +1,32 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import re
|
3
|
-
from
|
4
|
+
from collections.abc import Iterable, Iterator
|
5
|
+
from datetime import datetime
|
4
6
|
from pathlib import Path
|
5
|
-
|
6
|
-
|
7
|
-
from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime
|
8
|
-
|
7
|
+
from typing import NamedTuple, cast
|
9
8
|
|
10
9
|
import orgparse
|
11
|
-
from orgparse.date import
|
10
|
+
from orgparse.date import OrgDate, gene_timestamp_regex
|
12
11
|
from orgparse.node import OrgNode
|
13
12
|
|
13
|
+
from promnesia.common import (
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Results,
|
18
|
+
Url,
|
19
|
+
Visit,
|
20
|
+
file_mtime,
|
21
|
+
get_logger,
|
22
|
+
iter_urls,
|
23
|
+
)
|
14
24
|
|
15
25
|
UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
|
16
26
|
|
17
27
|
_warned = False
|
28
|
+
|
29
|
+
|
18
30
|
def warn_old_orgparse_once() -> Iterable[Exception]:
|
19
31
|
global _warned
|
20
32
|
if _warned:
|
@@ -35,8 +47,9 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
|
|
35
47
|
** subnote
|
36
48
|
"""
|
37
49
|
|
50
|
+
|
38
51
|
class Parsed(NamedTuple):
|
39
|
-
dt:
|
52
|
+
dt: datetime | None
|
40
53
|
heading: str
|
41
54
|
|
42
55
|
|
@@ -46,14 +59,14 @@ def _parse_node(n: OrgNode) -> Parsed:
|
|
46
59
|
|
47
60
|
heading = n.get_heading('raw')
|
48
61
|
pp = n.properties
|
49
|
-
createds = cast(
|
62
|
+
createds = cast(str | None, pp.get('CREATED', None))
|
50
63
|
if createds is None:
|
51
64
|
# TODO replace with 'match', but need to strip off priority etc first?
|
52
65
|
# see _parse_heading in orgparse
|
53
66
|
# todo maybe use n.get_timestamps(inactive=True, point=True)? only concern is that it's searching in the body as well?
|
54
67
|
m = CREATED_RGX.search(heading)
|
55
68
|
if m is not None:
|
56
|
-
createds = m.group(0)
|
69
|
+
createds = m.group(0) # could be None
|
57
70
|
# todo a bit hacky..
|
58
71
|
heading = heading.replace(createds + ' ', '')
|
59
72
|
if createds is not None:
|
@@ -62,7 +75,11 @@ def _parse_node(n: OrgNode) -> Parsed:
|
|
62
75
|
dt = None
|
63
76
|
else:
|
64
77
|
[odt] = OrgDate.list_from_str(createds)
|
65
|
-
|
78
|
+
start = odt.start
|
79
|
+
if not isinstance(start, datetime): # could be date
|
80
|
+
dt = datetime.combine(start, datetime.min.time()) # meh, but the best we can do?
|
81
|
+
else:
|
82
|
+
dt = start
|
66
83
|
else:
|
67
84
|
dt = None
|
68
85
|
return Parsed(dt=dt, heading=heading)
|
@@ -74,7 +91,7 @@ def _get_heading(n: OrgNode):
|
|
74
91
|
return '' if n.is_root() else n.get_heading(format='raw')
|
75
92
|
|
76
93
|
|
77
|
-
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[
|
94
|
+
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]:
|
78
95
|
try:
|
79
96
|
parsed = _parse_node(node)
|
80
97
|
except Exception as e:
|
@@ -98,7 +115,7 @@ def get_body_compat(node: OrgNode) -> str:
|
|
98
115
|
# get_body was only added to root in 0.2.0
|
99
116
|
for x in warn_old_orgparse_once():
|
100
117
|
# ugh. really crap, but it will at least only warn once... (becaue it caches)
|
101
|
-
raise x
|
118
|
+
raise x # noqa: B904
|
102
119
|
return UPDATE_ORGPARSE_WARNING
|
103
120
|
else:
|
104
121
|
raise e
|
@@ -150,7 +167,7 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
150
167
|
|
151
168
|
(parsed, node) = wr
|
152
169
|
dt = parsed.dt
|
153
|
-
assert dt is not None
|
170
|
+
assert dt is not None # shouldn't be because of fallback
|
154
171
|
for r in iter_org_urls(node):
|
155
172
|
# TODO get body recursively? not sure
|
156
173
|
try:
|
@@ -160,7 +177,7 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
160
177
|
ctx = parsed.heading + tagss + '\n' + get_body_compat(node)
|
161
178
|
except Exception as e:
|
162
179
|
yield e
|
163
|
-
ctx = 'ERROR'
|
180
|
+
ctx = 'ERROR' # TODO more context?
|
164
181
|
|
165
182
|
if isinstance(r, Url):
|
166
183
|
yield Visit(
|
@@ -168,9 +185,11 @@ def extract_from_file(fname: PathIsh) -> Results:
|
|
168
185
|
dt=dt,
|
169
186
|
locator=Loc.file(
|
170
187
|
fname,
|
171
|
-
line=getattr(
|
188
|
+
line=getattr(
|
189
|
+
node, 'linenumber', None
|
190
|
+
), # make it defensive so it works against older orgparse (pre 0.2)
|
172
191
|
),
|
173
192
|
context=ctx,
|
174
193
|
)
|
175
|
-
else:
|
194
|
+
else: # error
|
176
195
|
yield r
|
promnesia/sources/plaintext.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
from
|
2
|
-
from ..compat import removeprefix
|
1
|
+
from __future__ import annotations
|
3
2
|
|
4
3
|
from functools import lru_cache
|
5
4
|
from pathlib import Path
|
6
|
-
|
7
|
-
from
|
5
|
+
|
6
|
+
from promnesia.common import PathIsh, _is_windows
|
8
7
|
|
9
8
|
# https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
|
10
9
|
_URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
|
@@ -12,23 +11,24 @@ _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@
|
|
12
11
|
if _is_windows:
|
13
12
|
# wtf? for some reason on windows (in cmd.exe specificaly) \b isn't working...
|
14
13
|
# this will make the regex a bit less precise, but not end of the world
|
15
|
-
_URL_REGEX = removeprefix(
|
14
|
+
_URL_REGEX = _URL_REGEX.removeprefix(r'\b')
|
16
15
|
|
17
16
|
|
18
|
-
@lru_cache
|
17
|
+
@lru_cache
|
19
18
|
def _has_grep() -> bool:
|
20
19
|
import shutil
|
20
|
+
|
21
21
|
return shutil.which('grep') is not None
|
22
22
|
|
23
23
|
|
24
|
-
Command =
|
24
|
+
Command = list[str]
|
25
25
|
|
26
26
|
|
27
27
|
_GREP_ARGS: Command = [
|
28
28
|
'--color=never',
|
29
|
-
'-H',
|
30
|
-
'-n',
|
31
|
-
'-I',
|
29
|
+
'-H', # always show filename TODO not sure if works on osx
|
30
|
+
'-n', # print line numbers (to restore context)
|
31
|
+
'-I', # ignore binaries
|
32
32
|
]
|
33
33
|
|
34
34
|
if not _is_windows:
|
@@ -37,18 +37,20 @@ if not _is_windows:
|
|
37
37
|
'--exclude-dir=".git"',
|
38
38
|
]
|
39
39
|
|
40
|
+
|
40
41
|
# NOTE: grep/findstr exit with code 1 on no matches...
|
41
42
|
# we hack around it in shellcmd module (search 'grep')
|
42
|
-
def _grep(*, paths:
|
43
|
+
def _grep(*, paths: list[str], recursive: bool) -> Command:
|
43
44
|
return [
|
44
45
|
'grep',
|
45
46
|
*(['-r'] if recursive else []),
|
46
47
|
*_GREP_ARGS,
|
47
|
-
'-E',
|
48
|
+
'-E', # 'extended' syntax
|
48
49
|
_URL_REGEX,
|
49
50
|
*paths,
|
50
51
|
]
|
51
52
|
|
53
|
+
|
52
54
|
def _findstr(*, path: str, recursive: bool) -> Command:
|
53
55
|
return [
|
54
56
|
'findstr',
|
@@ -86,31 +88,31 @@ def _extract_from_file(path: str) -> Command:
|
|
86
88
|
def extract_from_path(path: PathIsh) -> Command:
|
87
89
|
pp = Path(path)
|
88
90
|
|
89
|
-
|
90
|
-
|
91
|
-
logger = get_logger()
|
92
|
-
if pp.is_dir(): # TODO handle archives here???
|
91
|
+
if pp.is_dir(): # TODO handle archives here???
|
93
92
|
return _extract_from_dir(str(pp))
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
93
|
+
|
94
|
+
if any(
|
95
|
+
pp.suffix == ex
|
96
|
+
for ex in (
|
97
|
+
'.xz',
|
98
|
+
'.bz2',
|
99
|
+
'.gz',
|
100
|
+
'.zip',
|
101
|
+
)
|
102
|
+
):
|
103
|
+
# todo should be debug?
|
104
|
+
# or should delete it completely, feels like unpacking archives here is a bit too much
|
105
|
+
raise RuntimeError(f"Archives aren't supported yet: {path}")
|
106
|
+
# logger.info(f"Extracting from compressed file {path}")
|
107
|
+
# import lzma
|
108
|
+
# from tempfile import NamedTemporaryFile
|
109
|
+
# # TODO hopefully, no collisions
|
110
|
+
# import os.path
|
111
|
+
# fname = os.path.join(tdir.name, os.path.basename(path))
|
112
|
+
# with open(fname, 'wb') as fo:
|
113
|
+
# with lzma.open(path, 'r') as cf:
|
114
|
+
# fo.write(cf.read())
|
115
|
+
# return _extract_from_file(fname)
|
116
|
+
|
117
|
+
r = _extract_from_file(str(pp))
|
118
|
+
return r
|
promnesia/sources/pocket.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks
|
3
3
|
'''
|
4
|
-
|
4
|
+
|
5
|
+
from promnesia.common import Loc, Results, Visit
|
5
6
|
|
6
7
|
|
7
8
|
def index() -> Results:
|
8
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
9
10
|
from my.pocket import articles
|
10
11
|
|
11
12
|
# TODO use docstring from my. module? E.g. describing which pocket format is expected
|
@@ -15,7 +16,8 @@ def index() -> Results:
|
|
15
16
|
loc = Loc.make(title=title, href=a.pocket_link)
|
16
17
|
# Add a reverse locator so that the Promnesia browser extension shows a
|
17
18
|
# link on the Pocket page back to the original URL.
|
18
|
-
|
19
|
+
# FIXME need to actually use it
|
20
|
+
_loc_rev = Loc.make(title=title, href=a.url)
|
19
21
|
hls = a.highlights
|
20
22
|
excerpt = a.json.get('excerpt', None)
|
21
23
|
if len(hls) == 0:
|
promnesia/sources/reddit.py
CHANGED
@@ -2,21 +2,28 @@
|
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module
|
3
3
|
'''
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
|
+
import typing
|
5
8
|
from itertools import chain
|
6
|
-
from typing import Set, Optional, Type
|
7
9
|
|
8
|
-
from
|
10
|
+
from promnesia.common import Loc, Results, Visit, extract_urls, logger
|
11
|
+
|
12
|
+
if typing.TYPE_CHECKING:
|
13
|
+
from my.reddit.common import Comment, RedditBase, Save, Submission, Upvote
|
14
|
+
|
9
15
|
|
16
|
+
def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results:
|
17
|
+
from . import hpi # noqa: F401
|
10
18
|
|
11
|
-
def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results:
|
12
|
-
from . import hpi
|
13
19
|
try:
|
14
|
-
from my.reddit.all import
|
20
|
+
from my.reddit.all import comments, saved, submissions, upvoted
|
15
21
|
except ModuleNotFoundError as e:
|
16
22
|
if "No module named 'my.reddit.all'" in str(e):
|
17
23
|
import warnings
|
24
|
+
|
18
25
|
warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
|
19
|
-
from my.reddit import
|
26
|
+
from my.reddit import comments, saved, submissions, upvoted
|
20
27
|
else:
|
21
28
|
raise e
|
22
29
|
|
@@ -58,11 +65,12 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
|
|
58
65
|
# mostly here so we can keep track of how the user
|
59
66
|
# wants to render markdown
|
60
67
|
class RedditRenderer:
|
61
|
-
def __init__(self, render_markdown: bool = False) -> None:
|
68
|
+
def __init__(self, *, render_markdown: bool = False) -> None:
|
62
69
|
self._link_extractor = None
|
63
70
|
self._parser_cls = None
|
64
71
|
try:
|
65
72
|
from .markdown import TextParser, extract_from_text
|
73
|
+
|
66
74
|
self._link_extractor = extract_from_text
|
67
75
|
self._parser_cls = TextParser
|
68
76
|
except ImportError as import_err:
|
@@ -72,43 +80,40 @@ class RedditRenderer:
|
|
72
80
|
# only send error if the user is trying to enable this feature
|
73
81
|
if render_markdown:
|
74
82
|
logger.exception(import_err)
|
75
|
-
logger.critical(
|
83
|
+
logger.critical(
|
84
|
+
"Could not import markdown module to render reddit markdown. Try 'python3 -m pip install mistletoe'"
|
85
|
+
)
|
76
86
|
render_markdown = False # force to be false, couldn't import
|
77
87
|
self.render_markdown = render_markdown
|
78
88
|
|
79
|
-
|
80
|
-
def _from_comment(self, i: 'Comment') -> Results:
|
89
|
+
def _from_comment(self, i: Comment) -> Results:
|
81
90
|
locator = Loc.make(
|
82
91
|
title='Reddit comment',
|
83
92
|
href=i.url,
|
84
93
|
)
|
85
94
|
yield from self._from_common(i, locator=locator)
|
86
95
|
|
87
|
-
|
88
|
-
def _from_submission(self, i: 'Submission') -> Results:
|
96
|
+
def _from_submission(self, i: Submission) -> Results:
|
89
97
|
locator = Loc.make(
|
90
98
|
title=f'Reddit submission: {i.title}',
|
91
99
|
href=i.url,
|
92
100
|
)
|
93
101
|
yield from self._from_common(i, locator=locator)
|
94
102
|
|
95
|
-
|
96
|
-
def _from_upvote(self, i: 'Upvote') -> Results:
|
103
|
+
def _from_upvote(self, i: Upvote) -> Results:
|
97
104
|
locator = Loc.make(
|
98
105
|
title='Reddit upvote',
|
99
106
|
href=i.url,
|
100
107
|
)
|
101
108
|
yield from self._from_common(i, locator=locator)
|
102
109
|
|
103
|
-
|
104
|
-
def _from_save(self, i: 'Save') -> Results:
|
110
|
+
def _from_save(self, i: Save) -> Results:
|
105
111
|
locator = Loc.make(
|
106
112
|
title='Reddit save',
|
107
113
|
href=i.url,
|
108
114
|
)
|
109
115
|
yield from self._from_common(i, locator=locator)
|
110
116
|
|
111
|
-
|
112
117
|
# to allow for possible subclassing by the user?
|
113
118
|
def _render_body(self, text: str) -> str:
|
114
119
|
if self.render_markdown and self._parser_cls is not None:
|
@@ -116,8 +121,7 @@ class RedditRenderer:
|
|
116
121
|
else:
|
117
122
|
return text
|
118
123
|
|
119
|
-
|
120
|
-
def _from_common(self, i: 'RedditBase', locator: Loc) -> Results:
|
124
|
+
def _from_common(self, i: RedditBase, locator: Loc) -> Results:
|
121
125
|
urls = [i.url]
|
122
126
|
# TODO this should belong to HPI.. fix permalink handling I guess
|
123
127
|
# ok, it's not present for all of them..
|
@@ -130,7 +134,7 @@ class RedditRenderer:
|
|
130
134
|
|
131
135
|
context = self._render_body(i.text)
|
132
136
|
|
133
|
-
emitted:
|
137
|
+
emitted: set[str] = set()
|
134
138
|
|
135
139
|
for url in chain(urls, extract_urls(i.text)):
|
136
140
|
if url in emitted:
|
@@ -164,9 +168,3 @@ class RedditRenderer:
|
|
164
168
|
locator=locator,
|
165
169
|
)
|
166
170
|
emitted.add(res.url)
|
167
|
-
|
168
|
-
|
169
|
-
import typing
|
170
|
-
if typing.TYPE_CHECKING:
|
171
|
-
from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase
|
172
|
-
|
@@ -2,11 +2,12 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Roam Research data
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
9
|
import my.roamresearch as RR
|
10
|
+
|
10
11
|
roam = RR.roam()
|
11
12
|
for node in roam.traverse():
|
12
13
|
yield from _collect(node)
|
@@ -14,7 +15,7 @@ def index() -> Results:
|
|
14
15
|
|
15
16
|
def _collect(node: 'RoamNode') -> Results:
|
16
17
|
title = node.title
|
17
|
-
body
|
18
|
+
body = node.body or ''
|
18
19
|
if title is None:
|
19
20
|
# most notes don't have title, so we just take the first line instead..
|
20
21
|
lines = body.splitlines(keepends=True)
|
@@ -43,6 +44,8 @@ def _collect(node: 'RoamNode') -> Results:
|
|
43
44
|
|
44
45
|
|
45
46
|
import typing
|
47
|
+
|
46
48
|
if typing.TYPE_CHECKING:
|
47
49
|
import my.roamresearch as RR
|
50
|
+
|
48
51
|
RoamNode = RR.Node
|
promnesia/sources/rss.py
CHANGED
@@ -2,27 +2,25 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from datetime import datetime, timezone
|
6
6
|
|
7
|
-
from
|
8
|
-
|
9
|
-
from datetime import datetime
|
10
|
-
|
11
|
-
import pytz
|
7
|
+
from promnesia.common import Loc, Results, Visit
|
12
8
|
|
13
9
|
# arbitrary, 2011-11-04 00:05:23.283+00:00
|
14
|
-
default_datetime = datetime.fromtimestamp(1320365123, tz=
|
10
|
+
default_datetime = datetime.fromtimestamp(1320365123, tz=timezone.utc)
|
15
11
|
# TODO FIXME allow for visit not to have datetime?
|
16
12
|
# I.e. even having context is pretty good!
|
17
13
|
|
14
|
+
|
18
15
|
def index() -> Results:
|
19
16
|
from my.rss.all import subscriptions
|
17
|
+
|
20
18
|
for feed in subscriptions():
|
21
19
|
# TODO locator should be optional too? although could use direct link in the rss reader interface
|
22
20
|
locator = Loc.make(title='my.rss')
|
23
21
|
yield Visit(
|
24
22
|
url=feed.url,
|
25
23
|
dt=feed.created_at or default_datetime,
|
26
|
-
context='RSS subscription',
|
24
|
+
context='RSS subscription', # TODO use 'provider', etc?
|
27
25
|
locator=locator,
|
28
26
|
)
|
promnesia/sources/shellcmd.py
CHANGED
@@ -2,18 +2,30 @@
|
|
2
2
|
Greps out URLs from an arbitrary shell command results.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
|
6
7
|
import os
|
7
8
|
import re
|
8
|
-
from subprocess import run, PIPE
|
9
|
-
from typing import Union, Sequence
|
10
9
|
import warnings
|
10
|
+
from collections.abc import Sequence
|
11
|
+
from datetime import datetime
|
12
|
+
from subprocess import PIPE, run
|
13
|
+
|
14
|
+
from promnesia.common import (
|
15
|
+
Loc,
|
16
|
+
PathIsh,
|
17
|
+
Results,
|
18
|
+
Visit,
|
19
|
+
_is_windows,
|
20
|
+
extract_urls,
|
21
|
+
file_mtime,
|
22
|
+
now_tz,
|
23
|
+
)
|
11
24
|
|
12
|
-
from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh
|
13
25
|
from .plaintext import _has_grep
|
14
26
|
|
15
27
|
|
16
|
-
def index(command:
|
28
|
+
def index(command: str | Sequence[PathIsh]) -> Results:
|
17
29
|
cmd: Sequence[PathIsh]
|
18
30
|
cmds: str
|
19
31
|
if isinstance(command, str):
|
@@ -24,8 +36,6 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
|
|
24
36
|
cmds = ' '.join(map(str, command))
|
25
37
|
cmd = command
|
26
38
|
|
27
|
-
tz = get_system_tz()
|
28
|
-
|
29
39
|
# ugh... on windows grep does something nasty? e.g:
|
30
40
|
# grep --color=never -r -H -n -I -E http 'D:\\a\\promnesia\\promnesia\\tests\\testdata\\custom'
|
31
41
|
# D:\a\promnesia\promnesia\tests\testdata\custom/file1.txt:1:Right, so this points at http://google.com
|
@@ -42,9 +52,9 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
|
|
42
52
|
fname = None
|
43
53
|
lineno = None
|
44
54
|
else:
|
45
|
-
fname
|
55
|
+
fname = m.group(1)
|
46
56
|
lineno = int(m.group(2))
|
47
|
-
line
|
57
|
+
line = m.group(3)
|
48
58
|
|
49
59
|
if fname is not None and needs_windows_grep_patching:
|
50
60
|
fname = fname.replace('/', os.sep)
|
@@ -71,9 +81,9 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
|
|
71
81
|
context=context,
|
72
82
|
)
|
73
83
|
|
74
|
-
r = run(cmd, stdout=PIPE)
|
84
|
+
r = run(cmd, stdout=PIPE, check=False)
|
75
85
|
if r.returncode > 0:
|
76
|
-
if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1):
|
86
|
+
if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
|
77
87
|
r.check_returncode()
|
78
88
|
output = r.stdout
|
79
89
|
assert output is not None
|