promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +60 -35
- promnesia/cannon.py +27 -27
- promnesia/common.py +85 -67
- promnesia/compare.py +21 -22
- promnesia/compat.py +10 -10
- promnesia/config.py +23 -23
- promnesia/database/common.py +67 -0
- promnesia/database/dump.py +188 -0
- promnesia/{read_db.py → database/load.py} +16 -17
- promnesia/extract.py +14 -11
- promnesia/kjson.py +12 -11
- promnesia/logging.py +4 -4
- promnesia/misc/__init__.pyi +0 -0
- promnesia/misc/config_example.py +1 -2
- promnesia/misc/install_server.py +7 -9
- promnesia/server.py +57 -47
- promnesia/sources/__init__.pyi +0 -0
- promnesia/sources/auto.py +50 -35
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +26 -16
- promnesia/sources/demo.py +19 -3
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +16 -7
- promnesia/sources/github.py +7 -9
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +4 -3
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +31 -21
- promnesia/sources/org.py +27 -13
- promnesia/sources/plaintext.py +30 -29
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +20 -19
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +33 -24
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +4 -3
- promnesia/sources/takeout.py +76 -9
- promnesia/sources/takeout_legacy.py +24 -12
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/__init__.py +0 -0
- promnesia/tests/common.py +140 -0
- promnesia/tests/server_helper.py +67 -0
- promnesia/tests/sources/__init__.py +0 -0
- promnesia/tests/sources/test_auto.py +65 -0
- promnesia/tests/sources/test_filetypes.py +43 -0
- promnesia/tests/sources/test_hypothesis.py +39 -0
- promnesia/tests/sources/test_org.py +64 -0
- promnesia/tests/sources/test_plaintext.py +25 -0
- promnesia/tests/sources/test_shellcmd.py +21 -0
- promnesia/tests/sources/test_takeout.py +56 -0
- promnesia/tests/test_cannon.py +325 -0
- promnesia/tests/test_cli.py +40 -0
- promnesia/tests/test_compare.py +30 -0
- promnesia/tests/test_config.py +289 -0
- promnesia/tests/test_db_dump.py +222 -0
- promnesia/tests/test_extract.py +65 -0
- promnesia/tests/test_extract_urls.py +43 -0
- promnesia/tests/test_indexer.py +251 -0
- promnesia/tests/test_server.py +291 -0
- promnesia/tests/test_traverse.py +39 -0
- promnesia/tests/utils.py +35 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
- promnesia/dump.py +0 -105
- promnesia-1.2.20230515.dist-info/RECORD +0 -58
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/sources/demo.py
CHANGED
@@ -3,18 +3,34 @@ A dummy source, used for testing
|
|
3
3
|
Generates a sequence of fake evenly separated visits
|
4
4
|
'''
|
5
5
|
|
6
|
+
from __future__ import annotations
|
7
|
+
|
6
8
|
from datetime import datetime, timedelta
|
7
9
|
|
8
|
-
from
|
10
|
+
from promnesia.common import Loc, Results, Visit
|
11
|
+
|
12
|
+
IsoFormatDt = str
|
13
|
+
Seconds = int
|
14
|
+
|
15
|
+
|
16
|
+
# TODO allow passing isoformat string as base_dt?
|
17
|
+
# and maybe something similar as delta? start with seconds maybe
|
18
|
+
def index(
|
19
|
+
count: int = 100,
|
20
|
+
*,
|
21
|
+
base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000),
|
22
|
+
delta: timedelta | Seconds = timedelta(hours=1),
|
23
|
+
) -> Results:
|
9
24
|
|
25
|
+
base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
|
26
|
+
delta_ = delta if isinstance(delta, timedelta) else timedelta(seconds=delta)
|
10
27
|
|
11
|
-
def index(count: int=100, *, base_dt: datetime=datetime.min + timedelta(days=5000), delta: timedelta=timedelta(hours=1)) -> Results:
|
12
28
|
# todo with some errors too?
|
13
29
|
# todo use data generation library suggested for HPI?
|
14
30
|
for i in range(count):
|
15
31
|
yield Visit(
|
16
32
|
url=f'https://demo.com/page{i}.html',
|
17
|
-
dt=
|
33
|
+
dt=base_dt_ + delta_ * i,
|
18
34
|
locator=Loc.make('demo'),
|
19
35
|
)
|
20
36
|
# todo add context?
|
promnesia/sources/fbmessenger.py
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for the messages data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
10
10
|
from my.fbmessenger import messages
|
11
|
+
|
11
12
|
for m in messages():
|
12
13
|
if isinstance(m, Exception):
|
13
14
|
yield m
|
promnesia/sources/filetypes.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections.abc import Iterable, Sequence
|
2
4
|
from functools import lru_cache
|
3
5
|
from pathlib import Path
|
4
|
-
from typing import
|
6
|
+
from typing import Callable, NamedTuple, Union
|
5
7
|
|
6
8
|
from ..common import Results, Url
|
7
9
|
|
8
|
-
|
9
10
|
# TODO doesn't really belong here...
|
10
11
|
Ctx = Sequence[str]
|
11
12
|
|
@@ -18,13 +19,13 @@ class EUrl(NamedTuple):
|
|
18
19
|
# keys are mime types + extensions
|
19
20
|
Ex = Callable[[Path], Union[Results, Iterable[EUrl]]]
|
20
21
|
# None means unhandled
|
21
|
-
TYPE2IDX:
|
22
|
+
TYPE2IDX: dict[str, Ex | None] = {}
|
22
23
|
# NOTE: there are some types in auto.py at the moment... it's a bit messy
|
23
24
|
|
24
25
|
|
25
26
|
# TYPE2IDX only contains the 'prefixes', to speed up the lookup we are using cache..
|
26
27
|
@lru_cache(None)
|
27
|
-
def type2idx(t: str) ->
|
28
|
+
def type2idx(t: str) -> Ex | None:
|
28
29
|
if len(t) == 0:
|
29
30
|
return None # just in case?
|
30
31
|
# first try exact match
|
@@ -67,6 +68,7 @@ CODE = {
|
|
67
68
|
'text/vnd.graphviz',
|
68
69
|
'text/x-diff', # patch files
|
69
70
|
'text/x-php',
|
71
|
+
'text/x-lilypond',
|
70
72
|
|
71
73
|
# these didn't have a mime type, or were mistyped?
|
72
74
|
'css',
|
@@ -96,9 +98,9 @@ audio/
|
|
96
98
|
video/
|
97
99
|
'''
|
98
100
|
|
99
|
-
handle_later = lambda *
|
101
|
+
handle_later = lambda *_args, **_kwargs: ()
|
100
102
|
|
101
|
-
def ignore(*
|
103
|
+
def ignore(*_args, **_kwargs):
|
102
104
|
# TODO log (once?)
|
103
105
|
yield from ()
|
104
106
|
|
@@ -115,6 +117,13 @@ TYPE2IDX.update({
|
|
115
117
|
'.vcf' : ignore,
|
116
118
|
'message/rfc822': ignore, # ??
|
117
119
|
|
120
|
+
# todo ignore all fonts?
|
121
|
+
'font/woff2': ignore,
|
122
|
+
'font/woff': ignore,
|
123
|
+
'text/x-Algol68': ignore, # ugh some license file had this?? maybe always index text/ as text?
|
124
|
+
'text/x-bytecode.python': ignore, # todo ignore all x-bytecode?
|
125
|
+
'text/calendar': ignore,
|
126
|
+
|
118
127
|
# TODO not sure what to do about these..
|
119
128
|
'application/octet-stream': handle_later,
|
120
129
|
'application/zip' : handle_later,
|
promnesia/sources/github.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] github module
|
3
3
|
'''
|
4
|
+
from __future__ import annotations
|
4
5
|
|
5
6
|
# Note: requires the 'mistletoe' module if you enable render_markdown
|
6
|
-
|
7
|
-
from typing import Optional, Set
|
8
|
-
|
9
|
-
from ..common import Results, Visit, Loc, iter_urls, logger
|
7
|
+
from promnesia.common import Loc, Results, Visit, iter_urls, logger
|
10
8
|
|
11
9
|
|
12
10
|
def index(*, render_markdown: bool = False) -> Results:
|
13
|
-
from . import hpi
|
11
|
+
from . import hpi # noqa: F401,I001
|
14
12
|
from my.github.all import events
|
15
13
|
|
16
14
|
if render_markdown:
|
@@ -29,9 +27,9 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
29
27
|
continue
|
30
28
|
|
31
29
|
# if enabled, convert the (markdown) body to HTML
|
32
|
-
context:
|
30
|
+
context: str | None = e.body
|
33
31
|
if e.body is not None and render_markdown:
|
34
|
-
context = TextParser(e.body)._doc_ashtml()
|
32
|
+
context = TextParser(e.body)._doc_ashtml() # type: ignore[possibly-undefined]
|
35
33
|
|
36
34
|
# locator should link back to this event
|
37
35
|
loc = Loc.make(title=e.summary, href=e.link)
|
@@ -59,7 +57,7 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
59
57
|
#
|
60
58
|
# Note: this set gets reset every event, is here to
|
61
59
|
# prevent duplicates between URLExtract and the markdown parser
|
62
|
-
emitted:
|
60
|
+
emitted: set[str] = set()
|
63
61
|
for url in iter_urls(e.body):
|
64
62
|
if url in emitted:
|
65
63
|
continue
|
@@ -74,7 +72,7 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
74
72
|
# extract from markdown links like [link text](https://...)
|
75
73
|
# incase URLExtract missed any somehow
|
76
74
|
if render_markdown:
|
77
|
-
for res in extract_from_text(e.body):
|
75
|
+
for res in extract_from_text(e.body): # type: ignore[possibly-undefined]
|
78
76
|
if isinstance(res, Exception):
|
79
77
|
yield res
|
80
78
|
continue
|
promnesia/sources/guess.py
CHANGED
promnesia/sources/hackernews.py
CHANGED
@@ -4,11 +4,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] dogsheep module to import HackerN
|
|
4
4
|
|
5
5
|
import textwrap
|
6
6
|
|
7
|
-
from promnesia.common import
|
7
|
+
from promnesia.common import Loc, Results, Visit
|
8
8
|
|
9
9
|
|
10
10
|
def index() -> Results:
|
11
|
-
from . import hpi
|
11
|
+
from . import hpi # noqa: F401,I001
|
12
12
|
from my.hackernews import dogsheep
|
13
13
|
|
14
14
|
for item in dogsheep.items():
|
promnesia/sources/hpi.py
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
Just a helper for a more humane error message when importing my.* dependencies
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import logger
|
6
6
|
|
7
7
|
try:
|
8
|
-
import my
|
8
|
+
import my # noqa: F401
|
9
9
|
except ImportError as e:
|
10
10
|
logger.exception(e)
|
11
11
|
logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')")
|
promnesia/sources/html.py
CHANGED
@@ -2,19 +2,21 @@
|
|
2
2
|
Extracts links from HTML files
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
6
|
-
from typing import Iterator, Tuple
|
5
|
+
from __future__ import annotations
|
7
6
|
|
8
|
-
from
|
7
|
+
from collections.abc import Iterator
|
8
|
+
from pathlib import Path
|
9
9
|
|
10
10
|
from bs4 import BeautifulSoup
|
11
11
|
|
12
|
+
from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
|
12
13
|
|
13
|
-
# TODO present error summary in the very end; import errors -- makes sense to show
|
14
|
+
# TODO present error summary in the very end; import errors -- makes sense to show
|
14
15
|
# TODO on some exceptions, request a fallback to text?
|
15
16
|
|
16
17
|
|
17
|
-
Url =
|
18
|
+
Url = tuple[str, str]
|
19
|
+
|
18
20
|
|
19
21
|
def extract_urls_from_html(s: str) -> Iterator[Url]:
|
20
22
|
"""
|
promnesia/sources/hypothesis.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
"""
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myhypothesis][hypothesis]] module
|
3
3
|
"""
|
4
|
-
|
4
|
+
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls, join_tags
|
5
6
|
|
6
7
|
|
7
8
|
def index() -> Results:
|
8
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
9
10
|
import my.hypothesis as hyp
|
10
11
|
|
11
|
-
for h in hyp.
|
12
|
+
for h in hyp.highlights():
|
12
13
|
if isinstance(h, Exception):
|
13
14
|
yield h
|
14
15
|
continue
|
promnesia/sources/instapaper.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
'''
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module
|
3
3
|
'''
|
4
|
-
from
|
4
|
+
from promnesia.common import Loc, Results, Visit
|
5
5
|
|
6
6
|
|
7
7
|
def index() -> Results:
|
8
|
-
from . import hpi
|
8
|
+
from . import hpi # noqa: F401,I001
|
9
9
|
import my.instapaper as ip
|
10
10
|
|
11
11
|
for p in ip.pages():
|
promnesia/sources/markdown.py
CHANGED
@@ -1,14 +1,24 @@
|
|
1
|
-
from
|
2
|
-
from typing import Iterator, NamedTuple, Optional
|
3
|
-
|
4
|
-
from ..common import get_logger, Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
|
5
|
-
|
6
|
-
|
7
|
-
import mistletoe # type: ignore
|
8
|
-
from mistletoe.span_token import AutoLink, Link # type: ignore
|
9
|
-
import mistletoe.block_token as BT # type: ignore
|
10
|
-
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
1
|
+
from __future__ import annotations
|
11
2
|
|
3
|
+
from collections.abc import Iterator
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import NamedTuple
|
6
|
+
|
7
|
+
import mistletoe # type: ignore
|
8
|
+
import mistletoe.block_token as BT # type: ignore
|
9
|
+
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
10
|
+
from mistletoe.span_token import AutoLink, Link # type: ignore
|
11
|
+
|
12
|
+
from promnesia.common import (
|
13
|
+
Extraction,
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Url,
|
18
|
+
Visit,
|
19
|
+
file_mtime,
|
20
|
+
logger,
|
21
|
+
)
|
12
22
|
|
13
23
|
renderer = HTMLRenderer()
|
14
24
|
|
@@ -18,7 +28,7 @@ block_tokens = tuple(getattr(BT, name) for name in BT.__all__)
|
|
18
28
|
|
19
29
|
class Parsed(NamedTuple):
|
20
30
|
url: Url
|
21
|
-
context:
|
31
|
+
context: str | None
|
22
32
|
|
23
33
|
|
24
34
|
Result = Res[Parsed]
|
@@ -42,7 +52,7 @@ HTML_MARKER = '!html '
|
|
42
52
|
def _ashtml(block) -> str:
|
43
53
|
res = renderer.render(block)
|
44
54
|
if res.startswith('<p>') and res.endswith('</p>'):
|
45
|
-
res = res[3
|
55
|
+
res = res[3:-4] # meh, but for now fine
|
46
56
|
return res
|
47
57
|
|
48
58
|
|
@@ -62,7 +72,6 @@ class Parser:
|
|
62
72
|
context = None if last_block is None else HTML_MARKER + _ashtml(last_block)
|
63
73
|
yield Parsed(url=url, context=context)
|
64
74
|
|
65
|
-
|
66
75
|
def _walk(self, cur, last_block) -> Iterator[Result]:
|
67
76
|
if isinstance(cur, block_tokens):
|
68
77
|
last_block = cur
|
@@ -73,12 +82,14 @@ class Parser:
|
|
73
82
|
logger.exception(e)
|
74
83
|
yield e
|
75
84
|
|
76
|
-
|
85
|
+
# keeping getattr for compatibility in older versions of mistletoe, it was optional
|
86
|
+
children = getattr(cur, 'children', None)
|
87
|
+
if children is None:
|
88
|
+
return
|
77
89
|
for c in children:
|
78
90
|
yield from self._walk(c, last_block=last_block)
|
79
91
|
|
80
|
-
|
81
|
-
def walk(self):
|
92
|
+
def walk(self) -> Iterator[Result]:
|
82
93
|
yield from self._walk(self.doc, last_block=None)
|
83
94
|
|
84
95
|
|
@@ -94,7 +105,7 @@ def extract_from_file(fname: PathIsh) -> Iterator[Extraction]:
|
|
94
105
|
yield Visit(
|
95
106
|
url=r.url,
|
96
107
|
dt=fallback_dt,
|
97
|
-
locator=Loc.file(fname),
|
108
|
+
locator=Loc.file(fname), # TODO line number
|
98
109
|
context=r.context,
|
99
110
|
)
|
100
111
|
|
@@ -105,9 +116,9 @@ class TextParser(Parser):
|
|
105
116
|
Instead of chunking blocks like for files, this returns the entire
|
106
117
|
message rendered as the context
|
107
118
|
'''
|
108
|
-
def __init__(self, text: str):
|
109
|
-
self.doc = mistletoe.Document(text)
|
110
119
|
|
120
|
+
def __init__(self, text: str) -> None:
|
121
|
+
self.doc = mistletoe.Document(text)
|
111
122
|
|
112
123
|
def _doc_ashtml(self):
|
113
124
|
'''
|
@@ -117,8 +128,7 @@ class TextParser(Parser):
|
|
117
128
|
self._html = HTML_MARKER + _ashtml(self.doc)
|
118
129
|
return self._html
|
119
130
|
|
120
|
-
|
121
|
-
def _extract(self, cur, last_block = None) -> Iterator[Parsed]:
|
131
|
+
def _extract(self, cur, last_block=None) -> Iterator[Parsed]: # noqa: ARG002
|
122
132
|
if not isinstance(cur, (AutoLink, Link)):
|
123
133
|
return
|
124
134
|
|
promnesia/sources/org.py
CHANGED
@@ -1,16 +1,26 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import re
|
3
|
-
from
|
4
|
+
from collections.abc import Iterable, Iterator
|
5
|
+
from datetime import datetime
|
4
6
|
from pathlib import Path
|
5
|
-
|
6
|
-
|
7
|
-
from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime
|
8
|
-
|
7
|
+
from typing import NamedTuple, Optional, cast
|
9
8
|
|
10
9
|
import orgparse
|
11
|
-
from orgparse.date import
|
10
|
+
from orgparse.date import OrgDate, gene_timestamp_regex
|
12
11
|
from orgparse.node import OrgNode
|
13
12
|
|
13
|
+
from promnesia.common import (
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Results,
|
18
|
+
Url,
|
19
|
+
Visit,
|
20
|
+
file_mtime,
|
21
|
+
get_logger,
|
22
|
+
iter_urls,
|
23
|
+
)
|
14
24
|
|
15
25
|
UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
|
16
26
|
|
@@ -36,7 +46,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
|
|
36
46
|
"""
|
37
47
|
|
38
48
|
class Parsed(NamedTuple):
|
39
|
-
dt:
|
49
|
+
dt: datetime | None
|
40
50
|
heading: str
|
41
51
|
|
42
52
|
|
@@ -57,8 +67,12 @@ def _parse_node(n: OrgNode) -> Parsed:
|
|
57
67
|
# todo a bit hacky..
|
58
68
|
heading = heading.replace(createds + ' ', '')
|
59
69
|
if createds is not None:
|
60
|
-
|
61
|
-
|
70
|
+
if '<%%' in createds:
|
71
|
+
# sexp date, not supported
|
72
|
+
dt = None
|
73
|
+
else:
|
74
|
+
[odt] = OrgDate.list_from_str(createds)
|
75
|
+
dt = odt.start
|
62
76
|
else:
|
63
77
|
dt = None
|
64
78
|
return Parsed(dt=dt, heading=heading)
|
@@ -70,7 +84,7 @@ def _get_heading(n: OrgNode):
|
|
70
84
|
return '' if n.is_root() else n.get_heading(format='raw')
|
71
85
|
|
72
86
|
|
73
|
-
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[
|
87
|
+
def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]:
|
74
88
|
try:
|
75
89
|
parsed = _parse_node(node)
|
76
90
|
except Exception as e:
|
@@ -80,7 +94,7 @@ def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgN
|
|
80
94
|
parsed = parsed._replace(dt=dt)
|
81
95
|
else:
|
82
96
|
dt = parsed.dt
|
83
|
-
|
97
|
+
yield parsed, node
|
84
98
|
|
85
99
|
for c in node.children:
|
86
100
|
yield from walk_node(node=c, dt=dt)
|
@@ -94,7 +108,7 @@ def get_body_compat(node: OrgNode) -> str:
|
|
94
108
|
# get_body was only added to root in 0.2.0
|
95
109
|
for x in warn_old_orgparse_once():
|
96
110
|
# ugh. really crap, but it will at least only warn once... (becaue it caches)
|
97
|
-
raise x
|
111
|
+
raise x # noqa: B904
|
98
112
|
return UPDATE_ORGPARSE_WARNING
|
99
113
|
else:
|
100
114
|
raise e
|
promnesia/sources/plaintext.py
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
from
|
2
|
-
from ..compat import removeprefix
|
1
|
+
from __future__ import annotations
|
3
2
|
|
4
3
|
from functools import lru_cache
|
5
4
|
from pathlib import Path
|
6
|
-
|
7
|
-
from
|
5
|
+
|
6
|
+
from promnesia.common import PathIsh, _is_windows, get_logger, get_tmpdir
|
8
7
|
|
9
8
|
# https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
|
10
9
|
_URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
|
@@ -12,16 +11,16 @@ _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@
|
|
12
11
|
if _is_windows:
|
13
12
|
# wtf? for some reason on windows (in cmd.exe specificaly) \b isn't working...
|
14
13
|
# this will make the regex a bit less precise, but not end of the world
|
15
|
-
_URL_REGEX = removeprefix(
|
14
|
+
_URL_REGEX = _URL_REGEX.removeprefix(r'\b')
|
16
15
|
|
17
16
|
|
18
|
-
@lru_cache
|
17
|
+
@lru_cache
|
19
18
|
def _has_grep() -> bool:
|
20
19
|
import shutil
|
21
20
|
return shutil.which('grep') is not None
|
22
21
|
|
23
22
|
|
24
|
-
Command =
|
23
|
+
Command = list[str]
|
25
24
|
|
26
25
|
|
27
26
|
_GREP_ARGS: Command = [
|
@@ -39,7 +38,7 @@ if not _is_windows:
|
|
39
38
|
|
40
39
|
# NOTE: grep/findstr exit with code 1 on no matches...
|
41
40
|
# we hack around it in shellcmd module (search 'grep')
|
42
|
-
def _grep(*, paths:
|
41
|
+
def _grep(*, paths: list[str], recursive: bool) -> Command:
|
43
42
|
return [
|
44
43
|
'grep',
|
45
44
|
*(['-r'] if recursive else []),
|
@@ -91,24 +90,26 @@ def extract_from_path(path: PathIsh) -> Command:
|
|
91
90
|
logger = get_logger()
|
92
91
|
if pp.is_dir(): # TODO handle archives here???
|
93
92
|
return _extract_from_dir(str(pp))
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
93
|
+
|
94
|
+
if any(pp.suffix == ex for ex in (
|
95
|
+
'.xz',
|
96
|
+
'.bz2',
|
97
|
+
'.gz',
|
98
|
+
'.zip',
|
99
|
+
)):
|
100
|
+
# todo should be debug?
|
101
|
+
# or should delete it completely, feels like unpacking archives here is a bit too much
|
102
|
+
raise RuntimeError(f"Archives aren't supported yet: {path}")
|
103
|
+
# logger.info(f"Extracting from compressed file {path}")
|
104
|
+
# import lzma
|
105
|
+
# from tempfile import NamedTemporaryFile
|
106
|
+
# # TODO hopefully, no collisions
|
107
|
+
# import os.path
|
108
|
+
# fname = os.path.join(tdir.name, os.path.basename(path))
|
109
|
+
# with open(fname, 'wb') as fo:
|
110
|
+
# with lzma.open(path, 'r') as cf:
|
111
|
+
# fo.write(cf.read())
|
112
|
+
# return _extract_from_file(fname)
|
113
|
+
|
114
|
+
r = _extract_from_file(str(pp))
|
115
|
+
return r
|
promnesia/sources/pocket.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks
|
3
3
|
'''
|
4
|
-
|
4
|
+
|
5
|
+
from promnesia.common import Loc, Results, Visit
|
5
6
|
|
6
7
|
|
7
8
|
def index() -> Results:
|
8
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
9
10
|
from my.pocket import articles
|
10
11
|
|
11
12
|
# TODO use docstring from my. module? E.g. describing which pocket format is expected
|
promnesia/sources/reddit.py
CHANGED
@@ -2,21 +2,27 @@
|
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module
|
3
3
|
'''
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
|
+
import typing
|
5
8
|
from itertools import chain
|
6
|
-
from typing import Set, Optional, Type
|
7
9
|
|
8
|
-
from
|
10
|
+
from promnesia.common import Loc, Results, Visit, extract_urls, logger
|
11
|
+
|
12
|
+
if typing.TYPE_CHECKING:
|
13
|
+
from my.reddit.common import Comment, RedditBase, Save, Submission, Upvote
|
14
|
+
|
9
15
|
|
16
|
+
def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results:
|
17
|
+
from . import hpi # noqa: F401
|
10
18
|
|
11
|
-
def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results:
|
12
|
-
from . import hpi
|
13
19
|
try:
|
14
|
-
from my.reddit.all import
|
20
|
+
from my.reddit.all import comments, saved, submissions, upvoted
|
15
21
|
except ModuleNotFoundError as e:
|
16
22
|
if "No module named 'my.reddit.all'" in str(e):
|
17
23
|
import warnings
|
18
24
|
warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
|
19
|
-
from my.reddit import
|
25
|
+
from my.reddit import comments, saved, submissions, upvoted
|
20
26
|
else:
|
21
27
|
raise e
|
22
28
|
|
@@ -58,7 +64,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
|
|
58
64
|
# mostly here so we can keep track of how the user
|
59
65
|
# wants to render markdown
|
60
66
|
class RedditRenderer:
|
61
|
-
def __init__(self, render_markdown: bool = False) -> None:
|
67
|
+
def __init__(self, *, render_markdown: bool = False) -> None:
|
62
68
|
self._link_extractor = None
|
63
69
|
self._parser_cls = None
|
64
70
|
try:
|
@@ -77,7 +83,7 @@ class RedditRenderer:
|
|
77
83
|
self.render_markdown = render_markdown
|
78
84
|
|
79
85
|
|
80
|
-
def _from_comment(self, i:
|
86
|
+
def _from_comment(self, i: Comment) -> Results:
|
81
87
|
locator = Loc.make(
|
82
88
|
title='Reddit comment',
|
83
89
|
href=i.url,
|
@@ -85,7 +91,7 @@ class RedditRenderer:
|
|
85
91
|
yield from self._from_common(i, locator=locator)
|
86
92
|
|
87
93
|
|
88
|
-
def _from_submission(self, i:
|
94
|
+
def _from_submission(self, i: Submission) -> Results:
|
89
95
|
locator = Loc.make(
|
90
96
|
title=f'Reddit submission: {i.title}',
|
91
97
|
href=i.url,
|
@@ -93,15 +99,15 @@ class RedditRenderer:
|
|
93
99
|
yield from self._from_common(i, locator=locator)
|
94
100
|
|
95
101
|
|
96
|
-
def _from_upvote(self, i:
|
102
|
+
def _from_upvote(self, i: Upvote) -> Results:
|
97
103
|
locator = Loc.make(
|
98
|
-
title=
|
104
|
+
title='Reddit upvote',
|
99
105
|
href=i.url,
|
100
106
|
)
|
101
107
|
yield from self._from_common(i, locator=locator)
|
102
108
|
|
103
109
|
|
104
|
-
def _from_save(self, i:
|
110
|
+
def _from_save(self, i: Save) -> Results:
|
105
111
|
locator = Loc.make(
|
106
112
|
title='Reddit save',
|
107
113
|
href=i.url,
|
@@ -117,7 +123,7 @@ class RedditRenderer:
|
|
117
123
|
return text
|
118
124
|
|
119
125
|
|
120
|
-
def _from_common(self, i:
|
126
|
+
def _from_common(self, i: RedditBase, locator: Loc) -> Results:
|
121
127
|
urls = [i.url]
|
122
128
|
# TODO this should belong to HPI.. fix permalink handling I guess
|
123
129
|
# ok, it's not present for all of them..
|
@@ -130,7 +136,7 @@ class RedditRenderer:
|
|
130
136
|
|
131
137
|
context = self._render_body(i.text)
|
132
138
|
|
133
|
-
emitted:
|
139
|
+
emitted: set[str] = set()
|
134
140
|
|
135
141
|
for url in chain(urls, extract_urls(i.text)):
|
136
142
|
if url in emitted:
|
@@ -165,8 +171,3 @@ class RedditRenderer:
|
|
165
171
|
)
|
166
172
|
emitted.add(res.url)
|
167
173
|
|
168
|
-
|
169
|
-
import typing
|
170
|
-
if typing.TYPE_CHECKING:
|
171
|
-
from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase
|
172
|
-
|