promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +38 -25
- promnesia/cannon.py +23 -23
- promnesia/common.py +49 -42
- promnesia/compare.py +18 -20
- promnesia/compat.py +10 -10
- promnesia/config.py +20 -22
- promnesia/database/common.py +4 -3
- promnesia/database/dump.py +14 -13
- promnesia/database/load.py +7 -7
- promnesia/extract.py +13 -11
- promnesia/kjson.py +11 -10
- promnesia/logging.py +1 -1
- promnesia/misc/install_server.py +7 -8
- promnesia/server.py +42 -31
- promnesia/sources/auto.py +43 -30
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +17 -13
- promnesia/sources/demo.py +7 -7
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +9 -7
- promnesia/sources/github.py +5 -7
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +3 -2
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +17 -7
- promnesia/sources/org.py +20 -10
- promnesia/sources/plaintext.py +30 -31
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +19 -18
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +3 -4
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +14 -13
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +3 -2
- promnesia/sources/takeout.py +23 -13
- promnesia/sources/takeout_legacy.py +15 -11
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/common.py +8 -5
- promnesia/tests/server_helper.py +11 -8
- promnesia/tests/sources/test_auto.py +2 -3
- promnesia/tests/sources/test_filetypes.py +2 -1
- promnesia/tests/sources/test_hypothesis.py +3 -3
- promnesia/tests/sources/test_org.py +2 -3
- promnesia/tests/sources/test_plaintext.py +0 -1
- promnesia/tests/sources/test_shellcmd.py +3 -4
- promnesia/tests/sources/test_takeout.py +3 -5
- promnesia/tests/test_cannon.py +5 -5
- promnesia/tests/test_cli.py +4 -6
- promnesia/tests/test_compare.py +1 -1
- promnesia/tests/test_config.py +7 -8
- promnesia/tests/test_db_dump.py +11 -12
- promnesia/tests/test_extract.py +10 -6
- promnesia/tests/test_indexer.py +14 -8
- promnesia/tests/test_server.py +2 -3
- promnesia/tests/test_traverse.py +0 -2
- promnesia/tests/utils.py +4 -4
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- promnesia-1.2.20240810.dist-info/RECORD +0 -83
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/sources/auto.py
CHANGED
@@ -5,33 +5,44 @@
|
|
5
5
|
- autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
|
6
6
|
- autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
|
7
7
|
"""
|
8
|
+
from __future__ import annotations
|
8
9
|
|
9
10
|
import csv
|
10
|
-
from concurrent.futures import ProcessPoolExecutor as Pool
|
11
|
-
from contextlib import nullcontext
|
12
|
-
from datetime import datetime
|
13
11
|
import itertools
|
14
12
|
import json
|
15
13
|
import os
|
16
|
-
from
|
14
|
+
from collections.abc import Iterable, Iterator, Sequence
|
15
|
+
from concurrent.futures import ProcessPoolExecutor as Pool
|
16
|
+
from contextlib import nullcontext
|
17
17
|
from fnmatch import fnmatch
|
18
|
+
from functools import wraps
|
18
19
|
from pathlib import Path
|
19
|
-
from
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
from typing import Any, Callable, NamedTuple, Optional
|
21
|
+
|
22
|
+
from promnesia.common import (
|
23
|
+
Loc,
|
24
|
+
PathIsh,
|
25
|
+
Result,
|
26
|
+
Results,
|
27
|
+
Visit,
|
28
|
+
echain,
|
29
|
+
extract_urls,
|
30
|
+
file_mtime,
|
31
|
+
get_logger,
|
32
|
+
get_tmpdir,
|
33
|
+
logger,
|
34
|
+
mime,
|
35
|
+
traverse,
|
36
|
+
warn_once,
|
37
|
+
)
|
38
|
+
from promnesia.config import use_cores
|
27
39
|
|
28
|
-
|
29
|
-
from .filetypes import EUrl, Ctx
|
30
|
-
from .auto_obsidian import obsidian_replacer
|
31
40
|
from .auto_logseq import logseq_replacer
|
41
|
+
from .auto_obsidian import obsidian_replacer
|
42
|
+
from .filetypes import Ctx, EUrl
|
32
43
|
|
33
44
|
|
34
|
-
def _collect(thing, path:
|
45
|
+
def _collect(thing, path: list[str], result: list[EUrl]) -> None:
|
35
46
|
if isinstance(thing, str):
|
36
47
|
ctx: Ctx = tuple(path)
|
37
48
|
result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
|
@@ -51,9 +62,9 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None:
|
|
51
62
|
|
52
63
|
|
53
64
|
# TODO mm. okay, I suppose could use kython consuming thingy?..
|
54
|
-
def collect_from(thing) ->
|
55
|
-
uuu:
|
56
|
-
path:
|
65
|
+
def collect_from(thing) -> list[EUrl]:
|
66
|
+
uuu: list[EUrl] = []
|
67
|
+
path: list[str] = []
|
57
68
|
_collect(thing, path, uuu)
|
58
69
|
return uuu
|
59
70
|
|
@@ -85,7 +96,7 @@ def _plaintext(path: Path) -> Results:
|
|
85
96
|
def fallback(ex):
|
86
97
|
"""Falls back to plaintext in case of issues"""
|
87
98
|
|
88
|
-
fallback_active:
|
99
|
+
fallback_active: dict[Any, bool] = {}
|
89
100
|
@wraps(ex)
|
90
101
|
def wrapped(path: Path):
|
91
102
|
nonlocal fallback_active
|
@@ -99,7 +110,7 @@ def fallback(ex):
|
|
99
110
|
except ModuleNotFoundError as me:
|
100
111
|
logger = get_logger()
|
101
112
|
logger.exception(me)
|
102
|
-
logger.
|
113
|
+
logger.warning('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
|
103
114
|
yield me
|
104
115
|
fallback_active[ex] = True
|
105
116
|
do_fallback = True
|
@@ -126,7 +137,7 @@ def _org(path: Path) -> Results:
|
|
126
137
|
return org.extract_from_file(path)
|
127
138
|
|
128
139
|
|
129
|
-
from .filetypes import
|
140
|
+
from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
|
130
141
|
|
131
142
|
TYPE2IDX.update({
|
132
143
|
'application/json': _json,
|
@@ -169,7 +180,7 @@ Replacer = Optional[Callable[[str, str], str]]
|
|
169
180
|
|
170
181
|
def index(
|
171
182
|
*paths: PathIsh,
|
172
|
-
ignored:
|
183
|
+
ignored: Sequence[str] | str=(),
|
173
184
|
follow: bool=True,
|
174
185
|
replacer: Replacer=None,
|
175
186
|
) -> Results:
|
@@ -210,10 +221,10 @@ class Options(NamedTuple):
|
|
210
221
|
# TODO option to add ignores? not sure..
|
211
222
|
# TODO I don't like this replacer thing... think about removing it
|
212
223
|
replacer: Replacer
|
213
|
-
root:
|
224
|
+
root: Path | None=None
|
214
225
|
|
215
226
|
|
216
|
-
def _index_file_aux(path: Path, opts: Options) ->
|
227
|
+
def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
|
217
228
|
# just a helper for the concurrent version (the generator isn't picklable)
|
218
229
|
try:
|
219
230
|
return list(_index_file(path, opts=opts))
|
@@ -248,7 +259,7 @@ def _index(path: Path, opts: Options) -> Results:
|
|
248
259
|
continue
|
249
260
|
|
250
261
|
p = p.resolve()
|
251
|
-
if not os.path.exists(p):
|
262
|
+
if not os.path.exists(p): # noqa: PTH110
|
252
263
|
logger.debug('ignoring %s: broken symlink?', p)
|
253
264
|
continue
|
254
265
|
|
@@ -266,8 +277,10 @@ def _index(path: Path, opts: Options) -> Results:
|
|
266
277
|
|
267
278
|
|
268
279
|
Mime = str
|
269
|
-
from .filetypes import Ex
|
270
|
-
|
280
|
+
from .filetypes import Ex # meh
|
281
|
+
|
282
|
+
|
283
|
+
def by_path(pp: Path) -> tuple[Ex | None, Mime | None]:
|
271
284
|
suf = pp.suffix.lower()
|
272
285
|
# firt check suffixes, it's faster
|
273
286
|
s = type2idx(suf)
|
@@ -318,7 +331,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
|
|
318
331
|
|
319
332
|
logger.debug('indexing via %s: %s', ip.__name__, pp)
|
320
333
|
|
321
|
-
def indexer() ->
|
334
|
+
def indexer() -> Urls | Results:
|
322
335
|
# eh, annoying.. need to make more generic..
|
323
336
|
idx = ip(pp)
|
324
337
|
try:
|
@@ -353,7 +366,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
|
|
353
366
|
v = v._replace(locator=loc)
|
354
367
|
|
355
368
|
if replacer is not None and root is not None:
|
356
|
-
upd:
|
369
|
+
upd: dict[str, Any] = {}
|
357
370
|
href = v.locator.href
|
358
371
|
if href is not None:
|
359
372
|
upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))
|
promnesia/sources/auto_logseq.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
import os.path
|
2
2
|
import urllib.parse
|
3
3
|
|
4
|
+
|
4
5
|
def logseq_replacer(path: str, root: str) -> str:
|
5
|
-
if not path.startswith("editor://") or not (path.endswith(
|
6
|
+
if not path.startswith("editor://") or not (path.endswith((".md", ".org"))):
|
6
7
|
return path
|
7
|
-
|
8
|
-
graph = os.path.basename(root)
|
9
|
-
page_name = os.path.basename(path).rsplit('.', 1)[0]
|
8
|
+
|
9
|
+
graph = os.path.basename(root) # noqa: PTH119
|
10
|
+
page_name = os.path.basename(path).rsplit('.', 1)[0] # noqa: PTH119
|
10
11
|
encoded_page_name = urllib.parse.quote(page_name)
|
11
|
-
|
12
|
+
|
12
13
|
uri = f"logseq://graph/{graph}?page={encoded_page_name}"
|
13
14
|
|
14
15
|
return uri
|
promnesia/sources/browser.py
CHANGED
@@ -2,15 +2,18 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
|
3
3
|
'''
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import re
|
6
|
-
from typing import Optional, Iterator, Any, TYPE_CHECKING
|
7
8
|
import warnings
|
9
|
+
from collections.abc import Iterator
|
10
|
+
from typing import TYPE_CHECKING, Any
|
8
11
|
|
9
|
-
from promnesia.common import
|
12
|
+
from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
|
10
13
|
|
11
14
|
|
12
|
-
def index(p:
|
13
|
-
from . import hpi
|
15
|
+
def index(p: PathIsh | None = None) -> Results:
|
16
|
+
from . import hpi # noqa: F401,I001
|
14
17
|
|
15
18
|
if p is None:
|
16
19
|
from my.browser.all import history
|
@@ -24,10 +27,11 @@ def index(p: Optional[PathIsh]=None) -> Results:
|
|
24
27
|
)
|
25
28
|
try:
|
26
29
|
yield from _index_new_with_adhoc_config(path=p)
|
27
|
-
return
|
28
30
|
except Exception as e:
|
29
31
|
logger.exception(e)
|
30
32
|
warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
|
33
|
+
else:
|
34
|
+
return
|
31
35
|
|
32
36
|
logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
|
33
37
|
yield from _index_old(path=p)
|
@@ -35,11 +39,12 @@ def index(p: Optional[PathIsh]=None) -> Results:
|
|
35
39
|
|
36
40
|
def _index_old(*, path: PathIsh) -> Results:
|
37
41
|
from . import browser_legacy
|
42
|
+
|
38
43
|
yield from browser_legacy.index(path)
|
39
44
|
|
40
45
|
|
41
46
|
def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
|
42
|
-
from . import hpi
|
47
|
+
from . import hpi # noqa: F401,I001
|
43
48
|
|
44
49
|
## previously, it was possible to index be called with multiple different db search paths
|
45
50
|
## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
|
@@ -50,7 +55,7 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
|
|
50
55
|
cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
|
51
56
|
##
|
52
57
|
|
53
|
-
from my.core.common import
|
58
|
+
from my.core.common import Paths, classproperty, get_files
|
54
59
|
class config:
|
55
60
|
class core:
|
56
61
|
cache_dir = cache_override
|
@@ -75,8 +80,8 @@ else:
|
|
75
80
|
|
76
81
|
def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
|
77
82
|
for v in history:
|
78
|
-
desc:
|
79
|
-
duration:
|
83
|
+
desc: str | None = None
|
84
|
+
duration: Second | None = None
|
80
85
|
metadata = v.metadata
|
81
86
|
if metadata is not None:
|
82
87
|
desc = metadata.title
|
@@ -1,13 +1,14 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import sqlite3
|
1
4
|
from datetime import datetime
|
2
5
|
from pathlib import Path
|
3
6
|
from urllib.parse import unquote
|
4
|
-
import sqlite3
|
5
|
-
from typing import List, Set, Optional
|
6
7
|
|
7
8
|
import pytz
|
8
9
|
|
9
|
-
from
|
10
|
-
from
|
10
|
+
from promnesia import config
|
11
|
+
from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
|
11
12
|
|
12
13
|
try:
|
13
14
|
from cachew import cachew
|
@@ -35,21 +36,21 @@ def index(p: PathIsh) -> Results:
|
|
35
36
|
|
36
37
|
|
37
38
|
|
38
|
-
def _index_dbs(dbs:
|
39
|
+
def _index_dbs(dbs: list[Path], cachew_name: str):
|
39
40
|
# TODO right... not ideal, need to think how to handle it properly...
|
40
41
|
import sys
|
41
42
|
sys.setrecursionlimit(5000)
|
42
43
|
|
43
44
|
cache_dir = config.get().cache_dir
|
44
45
|
cpath = None if cache_dir is None else cache_dir / cachew_name
|
45
|
-
emitted:
|
46
|
+
emitted: set = set()
|
46
47
|
yield from _index_dbs_aux(cpath, dbs, emitted=emitted)
|
47
48
|
|
48
49
|
|
49
50
|
# todo wow, stack traces are ridiculous here...
|
50
51
|
# todo hmm, feels like it should be a class or something?
|
51
|
-
@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
|
52
|
-
def _index_dbs_aux(cache_path:
|
52
|
+
@cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger) # noqa: ARG005
|
53
|
+
def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results:
|
53
54
|
if len(dbs) == 0:
|
54
55
|
return
|
55
56
|
|
@@ -75,7 +76,7 @@ def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) ->
|
|
75
76
|
yield from _index_db(db, emitted=emitted)
|
76
77
|
|
77
78
|
|
78
|
-
def _index_db(db: Path, emitted:
|
79
|
+
def _index_db(db: Path, emitted: set):
|
79
80
|
logger.info('processing %s', db) # debug level?
|
80
81
|
|
81
82
|
# todo schema check (not so critical for cachew though)
|
@@ -121,17 +122,20 @@ Col = str
|
|
121
122
|
ColType = str
|
122
123
|
|
123
124
|
|
124
|
-
from
|
125
|
+
from collections.abc import Sequence
|
126
|
+
from typing import NamedTuple, Union
|
127
|
+
|
125
128
|
|
126
129
|
class Schema(NamedTuple):
|
127
|
-
cols: Sequence[
|
130
|
+
cols: Sequence[tuple[Col, ColType]]
|
128
131
|
key: Sequence[str]
|
129
132
|
|
130
133
|
|
131
|
-
SchemaCheck =
|
134
|
+
SchemaCheck = tuple[str, Union[str, Sequence[str]]] # todo Union: meh
|
132
135
|
|
133
136
|
from dataclasses import dataclass
|
134
137
|
|
138
|
+
|
135
139
|
# todo protocol?
|
136
140
|
@dataclass
|
137
141
|
class Extr:
|
@@ -179,7 +183,7 @@ class Chrome(Extr):
|
|
179
183
|
dt = chrome_time_to_utc(int(ts))
|
180
184
|
url = unquote(url) # chrome urls are all quoted
|
181
185
|
dd = int(durs)
|
182
|
-
dur:
|
186
|
+
dur: Second | None = None if dd == 0 else dd // 1_000_000
|
183
187
|
return Visit(
|
184
188
|
url=url,
|
185
189
|
dt=dt,
|
promnesia/sources/demo.py
CHANGED
@@ -3,11 +3,11 @@ A dummy source, used for testing
|
|
3
3
|
Generates a sequence of fake evenly separated visits
|
4
4
|
'''
|
5
5
|
|
6
|
-
from
|
7
|
-
from typing import Union
|
6
|
+
from __future__ import annotations
|
8
7
|
|
9
|
-
from
|
8
|
+
from datetime import datetime, timedelta
|
10
9
|
|
10
|
+
from promnesia.common import Loc, Results, Visit
|
11
11
|
|
12
12
|
IsoFormatDt = str
|
13
13
|
Seconds = int
|
@@ -16,10 +16,10 @@ Seconds = int
|
|
16
16
|
# TODO allow passing isoformat string as base_dt?
|
17
17
|
# and maybe something similar as delta? start with seconds maybe
|
18
18
|
def index(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
count: int = 100,
|
20
|
+
*,
|
21
|
+
base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000),
|
22
|
+
delta: timedelta | Seconds = timedelta(hours=1),
|
23
23
|
) -> Results:
|
24
24
|
|
25
25
|
base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
|
promnesia/sources/fbmessenger.py
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for the messages data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
10
10
|
from my.fbmessenger import messages
|
11
|
+
|
11
12
|
for m in messages():
|
12
13
|
if isinstance(m, Exception):
|
13
14
|
yield m
|
promnesia/sources/filetypes.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections.abc import Iterable, Sequence
|
2
4
|
from functools import lru_cache
|
3
5
|
from pathlib import Path
|
4
|
-
from typing import
|
6
|
+
from typing import Callable, NamedTuple, Union
|
5
7
|
|
6
8
|
from ..common import Results, Url
|
7
9
|
|
8
|
-
|
9
10
|
# TODO doesn't really belong here...
|
10
11
|
Ctx = Sequence[str]
|
11
12
|
|
@@ -18,13 +19,13 @@ class EUrl(NamedTuple):
|
|
18
19
|
# keys are mime types + extensions
|
19
20
|
Ex = Callable[[Path], Union[Results, Iterable[EUrl]]]
|
20
21
|
# None means unhandled
|
21
|
-
TYPE2IDX:
|
22
|
+
TYPE2IDX: dict[str, Ex | None] = {}
|
22
23
|
# NOTE: there are some types in auto.py at the moment... it's a bit messy
|
23
24
|
|
24
25
|
|
25
26
|
# TYPE2IDX only contains the 'prefixes', to speed up the lookup we are using cache..
|
26
27
|
@lru_cache(None)
|
27
|
-
def type2idx(t: str) ->
|
28
|
+
def type2idx(t: str) -> Ex | None:
|
28
29
|
if len(t) == 0:
|
29
30
|
return None # just in case?
|
30
31
|
# first try exact match
|
@@ -97,9 +98,9 @@ audio/
|
|
97
98
|
video/
|
98
99
|
'''
|
99
100
|
|
100
|
-
handle_later = lambda *
|
101
|
+
handle_later = lambda *_args, **_kwargs: ()
|
101
102
|
|
102
|
-
def ignore(*
|
103
|
+
def ignore(*_args, **_kwargs):
|
103
104
|
# TODO log (once?)
|
104
105
|
yield from ()
|
105
106
|
|
@@ -121,6 +122,7 @@ TYPE2IDX.update({
|
|
121
122
|
'font/woff': ignore,
|
122
123
|
'text/x-Algol68': ignore, # ugh some license file had this?? maybe always index text/ as text?
|
123
124
|
'text/x-bytecode.python': ignore, # todo ignore all x-bytecode?
|
125
|
+
'text/calendar': ignore,
|
124
126
|
|
125
127
|
# TODO not sure what to do about these..
|
126
128
|
'application/octet-stream': handle_later,
|
promnesia/sources/github.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] github module
|
3
3
|
'''
|
4
|
+
from __future__ import annotations
|
4
5
|
|
5
6
|
# Note: requires the 'mistletoe' module if you enable render_markdown
|
6
|
-
|
7
|
-
from typing import Optional, Set
|
8
|
-
|
9
|
-
from ..common import Results, Visit, Loc, iter_urls, logger
|
7
|
+
from promnesia.common import Loc, Results, Visit, iter_urls, logger
|
10
8
|
|
11
9
|
|
12
10
|
def index(*, render_markdown: bool = False) -> Results:
|
13
|
-
from . import hpi
|
11
|
+
from . import hpi # noqa: F401,I001
|
14
12
|
from my.github.all import events
|
15
13
|
|
16
14
|
if render_markdown:
|
@@ -29,7 +27,7 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
29
27
|
continue
|
30
28
|
|
31
29
|
# if enabled, convert the (markdown) body to HTML
|
32
|
-
context:
|
30
|
+
context: str | None = e.body
|
33
31
|
if e.body is not None and render_markdown:
|
34
32
|
context = TextParser(e.body)._doc_ashtml() # type: ignore[possibly-undefined]
|
35
33
|
|
@@ -59,7 +57,7 @@ def index(*, render_markdown: bool = False) -> Results:
|
|
59
57
|
#
|
60
58
|
# Note: this set gets reset every event, is here to
|
61
59
|
# prevent duplicates between URLExtract and the markdown parser
|
62
|
-
emitted:
|
60
|
+
emitted: set[str] = set()
|
63
61
|
for url in iter_urls(e.body):
|
64
62
|
if url in emitted:
|
65
63
|
continue
|
promnesia/sources/guess.py
CHANGED
promnesia/sources/hackernews.py
CHANGED
@@ -4,11 +4,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] dogsheep module to import HackerN
|
|
4
4
|
|
5
5
|
import textwrap
|
6
6
|
|
7
|
-
from promnesia.common import
|
7
|
+
from promnesia.common import Loc, Results, Visit
|
8
8
|
|
9
9
|
|
10
10
|
def index() -> Results:
|
11
|
-
from . import hpi
|
11
|
+
from . import hpi # noqa: F401,I001
|
12
12
|
from my.hackernews import dogsheep
|
13
13
|
|
14
14
|
for item in dogsheep.items():
|
promnesia/sources/hpi.py
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
Just a helper for a more humane error message when importing my.* dependencies
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import logger
|
6
6
|
|
7
7
|
try:
|
8
|
-
import my
|
8
|
+
import my # noqa: F401
|
9
9
|
except ImportError as e:
|
10
10
|
logger.exception(e)
|
11
11
|
logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')")
|
promnesia/sources/html.py
CHANGED
@@ -2,19 +2,21 @@
|
|
2
2
|
Extracts links from HTML files
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
6
|
-
from typing import Iterator, Tuple
|
5
|
+
from __future__ import annotations
|
7
6
|
|
8
|
-
from
|
7
|
+
from collections.abc import Iterator
|
8
|
+
from pathlib import Path
|
9
9
|
|
10
10
|
from bs4 import BeautifulSoup
|
11
11
|
|
12
|
+
from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
|
12
13
|
|
13
|
-
# TODO present error summary in the very end; import errors -- makes sense to show
|
14
|
+
# TODO present error summary in the very end; import errors -- makes sense to show
|
14
15
|
# TODO on some exceptions, request a fallback to text?
|
15
16
|
|
16
17
|
|
17
|
-
Url =
|
18
|
+
Url = tuple[str, str]
|
19
|
+
|
18
20
|
|
19
21
|
def extract_urls_from_html(s: str) -> Iterator[Url]:
|
20
22
|
"""
|
promnesia/sources/hypothesis.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
"""
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myhypothesis][hypothesis]] module
|
3
3
|
"""
|
4
|
-
|
4
|
+
|
5
|
+
from promnesia.common import Loc, Results, Visit, extract_urls, join_tags
|
5
6
|
|
6
7
|
|
7
8
|
def index() -> Results:
|
8
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
9
10
|
import my.hypothesis as hyp
|
10
11
|
|
11
12
|
for h in hyp.highlights():
|
promnesia/sources/instapaper.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
'''
|
2
2
|
Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module
|
3
3
|
'''
|
4
|
-
from
|
4
|
+
from promnesia.common import Loc, Results, Visit
|
5
5
|
|
6
6
|
|
7
7
|
def index() -> Results:
|
8
|
-
from . import hpi
|
8
|
+
from . import hpi # noqa: F401,I001
|
9
9
|
import my.instapaper as ip
|
10
10
|
|
11
11
|
for p in ip.pages():
|
promnesia/sources/markdown.py
CHANGED
@@ -1,14 +1,24 @@
|
|
1
|
-
from
|
2
|
-
from typing import Iterator, NamedTuple, Optional
|
3
|
-
|
4
|
-
from ..common import Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
|
1
|
+
from __future__ import annotations
|
5
2
|
|
3
|
+
from collections.abc import Iterator
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import NamedTuple
|
6
6
|
|
7
7
|
import mistletoe # type: ignore
|
8
|
-
from mistletoe.span_token import AutoLink, Link # type: ignore
|
9
8
|
import mistletoe.block_token as BT # type: ignore
|
10
9
|
from mistletoe.html_renderer import HTMLRenderer # type: ignore
|
10
|
+
from mistletoe.span_token import AutoLink, Link # type: ignore
|
11
11
|
|
12
|
+
from promnesia.common import (
|
13
|
+
Extraction,
|
14
|
+
Loc,
|
15
|
+
PathIsh,
|
16
|
+
Res,
|
17
|
+
Url,
|
18
|
+
Visit,
|
19
|
+
file_mtime,
|
20
|
+
logger,
|
21
|
+
)
|
12
22
|
|
13
23
|
renderer = HTMLRenderer()
|
14
24
|
|
@@ -18,7 +28,7 @@ block_tokens = tuple(getattr(BT, name) for name in BT.__all__)
|
|
18
28
|
|
19
29
|
class Parsed(NamedTuple):
|
20
30
|
url: Url
|
21
|
-
context:
|
31
|
+
context: str | None
|
22
32
|
|
23
33
|
|
24
34
|
Result = Res[Parsed]
|
@@ -118,7 +128,7 @@ class TextParser(Parser):
|
|
118
128
|
self._html = HTML_MARKER + _ashtml(self.doc)
|
119
129
|
return self._html
|
120
130
|
|
121
|
-
def _extract(self, cur, last_block=None) -> Iterator[Parsed]:
|
131
|
+
def _extract(self, cur, last_block=None) -> Iterator[Parsed]: # noqa: ARG002
|
122
132
|
if not isinstance(cur, (AutoLink, Link)):
|
123
133
|
return
|
124
134
|
|