promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +4 -1
- promnesia/__main__.py +72 -59
- promnesia/cannon.py +90 -89
- promnesia/common.py +74 -62
- promnesia/compare.py +15 -10
- promnesia/config.py +22 -17
- promnesia/database/dump.py +1 -2
- promnesia/extract.py +6 -6
- promnesia/logging.py +27 -15
- promnesia/misc/install_server.py +25 -19
- promnesia/server.py +69 -53
- promnesia/sources/auto.py +65 -51
- promnesia/sources/browser.py +7 -2
- promnesia/sources/browser_legacy.py +51 -40
- promnesia/sources/demo.py +0 -1
- promnesia/sources/fbmessenger.py +0 -1
- promnesia/sources/filetypes.py +15 -11
- promnesia/sources/github.py +4 -1
- promnesia/sources/guess.py +4 -1
- promnesia/sources/hackernews.py +5 -7
- promnesia/sources/hpi.py +3 -1
- promnesia/sources/html.py +4 -2
- promnesia/sources/instapaper.py +1 -0
- promnesia/sources/markdown.py +4 -4
- promnesia/sources/org.py +17 -8
- promnesia/sources/plaintext.py +14 -11
- promnesia/sources/pocket.py +2 -1
- promnesia/sources/reddit.py +5 -8
- promnesia/sources/roamresearch.py +3 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +3 -6
- promnesia/sources/signal.py +14 -14
- promnesia/sources/smscalls.py +0 -1
- promnesia/sources/stackexchange.py +2 -2
- promnesia/sources/takeout.py +14 -21
- promnesia/sources/takeout_legacy.py +16 -10
- promnesia/sources/telegram.py +7 -3
- promnesia/sources/telegram_legacy.py +5 -5
- promnesia/sources/twitter.py +1 -1
- promnesia/sources/vcs.py +6 -3
- promnesia/sources/viber.py +2 -2
- promnesia/sources/website.py +4 -3
- promnesia/sqlite.py +10 -7
- promnesia/tests/common.py +2 -0
- promnesia/tests/server_helper.py +2 -2
- promnesia/tests/sources/test_filetypes.py +9 -7
- promnesia/tests/sources/test_hypothesis.py +7 -3
- promnesia/tests/sources/test_org.py +7 -2
- promnesia/tests/sources/test_plaintext.py +9 -7
- promnesia/tests/sources/test_shellcmd.py +10 -9
- promnesia/tests/test_cannon.py +254 -237
- promnesia/tests/test_cli.py +8 -2
- promnesia/tests/test_compare.py +16 -12
- promnesia/tests/test_db_dump.py +4 -3
- promnesia/tests/test_extract.py +7 -4
- promnesia/tests/test_indexer.py +10 -10
- promnesia/tests/test_server.py +10 -10
- promnesia/tests/utils.py +1 -5
- promnesia-1.4.20250909.dist-info/METADATA +66 -0
- promnesia-1.4.20250909.dist-info/RECORD +80 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
- promnesia/kjson.py +0 -122
- promnesia/sources/__init__.pyi +0 -0
- promnesia-1.3.20241021.dist-info/METADATA +0 -55
- promnesia-1.3.20241021.dist-info/RECORD +0 -83
- promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
@@ -13,9 +13,11 @@ def index() -> Results:
|
|
13
13
|
# although could raise a warning on top level, when source emitted no takeouts
|
14
14
|
|
15
15
|
# TODO youtube?
|
16
|
+
# fmt: off
|
16
17
|
google_activities = [read_google_activity(t) for t in takeouts]
|
17
18
|
search_activities = [read_search_activity(t) for t in takeouts]
|
18
19
|
browser_histories = [read_browser_history_json(t) for t in takeouts]
|
20
|
+
# fmt: on
|
19
21
|
|
20
22
|
key = lambda v: (v.dt, v.url)
|
21
23
|
return chain(
|
@@ -25,14 +27,12 @@ def index() -> Results:
|
|
25
27
|
)
|
26
28
|
|
27
29
|
|
28
|
-
|
29
30
|
import json
|
30
31
|
from collections.abc import Iterable
|
31
|
-
from datetime import datetime
|
32
|
+
from datetime import datetime, timezone
|
32
33
|
from itertools import chain
|
33
34
|
from pathlib import Path
|
34
35
|
|
35
|
-
import pytz
|
36
36
|
from more_itertools import unique_everseen
|
37
37
|
|
38
38
|
from promnesia import config
|
@@ -42,6 +42,7 @@ try:
|
|
42
42
|
except ModuleNotFoundError as me:
|
43
43
|
if me.name != 'cachew':
|
44
44
|
raise me
|
45
|
+
|
45
46
|
# this module is legacy anyway, so just make it defensive
|
46
47
|
def cachew(*args, **kwargs): # type: ignore[no-redef]
|
47
48
|
return lambda f: f
|
@@ -53,7 +54,7 @@ TakeoutPath = Path
|
|
53
54
|
|
54
55
|
def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
|
55
56
|
# FIXME switch to actual kompress? and use CPath?
|
56
|
-
from my.core.kompress import kexists
|
57
|
+
from my.core.kompress import kexists # type: ignore[attr-defined]
|
57
58
|
|
58
59
|
# TODO glob
|
59
60
|
# TODO not sure about windows path separators??
|
@@ -65,6 +66,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
|
|
65
66
|
|
66
67
|
locator = Loc.file(spath)
|
67
68
|
from my.google.takeout.html import read_html
|
69
|
+
|
68
70
|
for dt, url, _title in read_html(takeout, spath):
|
69
71
|
yield Visit(
|
70
72
|
url=url,
|
@@ -73,6 +75,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
|
|
73
75
|
debug=kind,
|
74
76
|
)
|
75
77
|
|
78
|
+
|
76
79
|
def _cpath(suffix: str):
|
77
80
|
def fun(takeout: TakeoutPath):
|
78
81
|
cache_dir = config.get().cache_dir
|
@@ -80,23 +83,27 @@ def _cpath(suffix: str):
|
|
80
83
|
return None
|
81
84
|
# doesn't need a nontrivial hash function, timestsamp is encoded in name
|
82
85
|
return cache_dir / (takeout.name + '_' + suffix + '.cache')
|
86
|
+
|
83
87
|
return fun
|
84
88
|
|
85
89
|
|
86
90
|
# todo caching should this be HPI responsibility?
|
87
91
|
# todo set global cachew logging on init?
|
88
|
-
@cachew(cache_path=_cpath('google_activity')
|
92
|
+
@cachew(cache_path=_cpath('google_activity'), logger=logger)
|
89
93
|
def read_google_activity(takeout: TakeoutPath) -> Iterable[Visit]:
|
90
94
|
return _read_myactivity_html(takeout, 'Chrome/MyActivity.html')
|
91
95
|
|
92
|
-
|
96
|
+
|
97
|
+
@cachew(cache_path=_cpath('search_activity'), logger=logger)
|
93
98
|
def read_search_activity(takeout: TakeoutPath) -> Iterable[Visit]:
|
94
99
|
return _read_myactivity_html(takeout, 'Search/MyActivity.html')
|
95
100
|
|
101
|
+
|
96
102
|
# TODO add this to tests?
|
97
103
|
@cachew(cache_path=_cpath('browser_activity'), logger=logger)
|
98
104
|
def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
|
99
|
-
from my.core.kompress import kexists, kopen
|
105
|
+
from my.core.kompress import kexists, kopen # type: ignore[attr-defined]
|
106
|
+
|
100
107
|
# not sure if this deserves moving to HPI? it's pretty trivial for now
|
101
108
|
spath = 'Takeout/Chrome/BrowserHistory.json'
|
102
109
|
|
@@ -111,13 +118,13 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
|
|
111
118
|
# TODO this should be supported by HPI now?
|
112
119
|
|
113
120
|
j = None
|
114
|
-
with kopen(takeout, spath) as fo:
|
121
|
+
with kopen(takeout, spath) as fo: # TODO iterative parser?
|
115
122
|
j = json.load(fo)
|
116
123
|
|
117
124
|
hist = j['Browser History']
|
118
125
|
for item in hist:
|
119
126
|
url = item['url']
|
120
|
-
time = datetime.fromtimestamp(item['time_usec'] / 10
|
127
|
+
time = datetime.fromtimestamp(item['time_usec'] / 10**6, tz=timezone.utc)
|
121
128
|
# TODO any more interesitng info?
|
122
129
|
yield Visit(
|
123
130
|
url=url,
|
@@ -125,4 +132,3 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
|
|
125
132
|
locator=locator,
|
126
133
|
debug='Chrome/BrowserHistory.json',
|
127
134
|
)
|
128
|
-
|
promnesia/sources/telegram.py
CHANGED
@@ -6,7 +6,7 @@ from urllib.parse import unquote # TODO mm, make it easier to rememember to use
|
|
6
6
|
from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
|
7
7
|
|
8
8
|
|
9
|
-
def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False)
|
9
|
+
def index(database: PathIsh | None = None, *, http_only: bool = False, with_extra_media_info: bool = False) -> Results:
|
10
10
|
if database is None:
|
11
11
|
# fully relying on HPI
|
12
12
|
yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
|
@@ -17,7 +17,9 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
|
|
17
17
|
f'Will try to hack database path {database} into HPI config.'
|
18
18
|
)
|
19
19
|
try:
|
20
|
-
yield from _index_new_with_adhoc_config(
|
20
|
+
yield from _index_new_with_adhoc_config(
|
21
|
+
database=database, http_only=http_only, with_extra_media_info=with_extra_media_info
|
22
|
+
)
|
21
23
|
except Exception as e:
|
22
24
|
logger.exception(e)
|
23
25
|
warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
|
@@ -30,11 +32,12 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
|
|
30
32
|
|
31
33
|
def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
|
32
34
|
from . import telegram_legacy
|
35
|
+
|
33
36
|
yield from telegram_legacy.index(database=database, http_only=http_only)
|
34
37
|
|
35
38
|
|
36
39
|
def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
|
37
|
-
from . import hpi # noqa: F401
|
40
|
+
from . import hpi # noqa: F401
|
38
41
|
|
39
42
|
class config:
|
40
43
|
class telegram:
|
@@ -42,6 +45,7 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
|
|
42
45
|
export_path: PathIsh = database
|
43
46
|
|
44
47
|
from my.core.cfg import tmp_config
|
48
|
+
|
45
49
|
with tmp_config(modules='my.telegram.telegram_backup', config=config):
|
46
50
|
yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
|
47
51
|
|
@@ -18,7 +18,6 @@ from promnesia.common import (
|
|
18
18
|
echain,
|
19
19
|
extract_urls,
|
20
20
|
from_epoch,
|
21
|
-
get_logger,
|
22
21
|
)
|
23
22
|
|
24
23
|
from ..sqlite import sqlite_connection
|
@@ -32,15 +31,13 @@ def unwrap(res: T | Exception) -> T:
|
|
32
31
|
return res
|
33
32
|
|
34
33
|
|
35
|
-
def index(database: PathIsh, *, http_only: bool=False) -> Results:
|
34
|
+
def index(database: PathIsh, *, http_only: bool = False) -> Results:
|
36
35
|
"""
|
37
36
|
:param database:
|
38
37
|
the path of the sqlite generated by the _telegram_backup_ java program
|
39
38
|
:param http_only:
|
40
39
|
when true, do not collect IP-addresses and `python.py` strings
|
41
40
|
"""
|
42
|
-
logger = get_logger()
|
43
|
-
|
44
41
|
path = Path(database)
|
45
42
|
assert path.is_file(), path
|
46
43
|
|
@@ -77,7 +74,8 @@ def index(database: PathIsh, *, http_only: bool=False) -> Results:
|
|
77
74
|
M.message_type NOT IN ('service_message', 'empty_message')
|
78
75
|
{extra_criteria}
|
79
76
|
ORDER BY time;
|
80
|
-
"""
|
77
|
+
"""
|
78
|
+
)
|
81
79
|
|
82
80
|
with sqlite_connection(path, immutable=True, row_factory='row') as db:
|
83
81
|
# TODO yield error if chatname or chat or smth else is null?
|
@@ -105,6 +103,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
|
|
105
103
|
urls = extract_urls(text)
|
106
104
|
if len(urls) == 0:
|
107
105
|
return
|
106
|
+
# fmt: off
|
108
107
|
dt = from_epoch(row['time'])
|
109
108
|
mid: str = unwrap(row['mid'])
|
110
109
|
|
@@ -112,6 +111,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
|
|
112
111
|
sender: str = unwrap(row['sender'])
|
113
112
|
chatname: str = unwrap(row['chatname'])
|
114
113
|
chat: str = unwrap(row['chat'])
|
114
|
+
# fmt: on
|
115
115
|
|
116
116
|
in_context = f'https://t.me/{chat}/{mid}'
|
117
117
|
for u in urls:
|
promnesia/sources/twitter.py
CHANGED
promnesia/sources/vcs.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
'''
|
2
2
|
Clones & indexes Git repositories (via sources.auto)
|
3
3
|
'''
|
4
|
+
|
4
5
|
from __future__ import annotations
|
5
6
|
|
6
7
|
import re
|
@@ -22,7 +23,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
22
23
|
# note: https://bugs.python.org/issue33617 , it doesn't like Path here on Windows
|
23
24
|
check_call(['git', 'clone', repo, str(tp)])
|
24
25
|
|
25
|
-
def replacer(p: PathIsh, prefix: str=str(tp), repo: str=repo) -> str:
|
26
|
+
def replacer(p: PathIsh, prefix: str = str(tp), repo: str = repo) -> str:
|
26
27
|
ps = str(p)
|
27
28
|
# TODO prefix is a bit misleading
|
28
29
|
pos = ps.find(prefix)
|
@@ -31,13 +32,15 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
31
32
|
return ps
|
32
33
|
# TODO ugh. seems that blame view https://github.com/davidgasquez/handbook/blame/master/README.md#L25 is the most reliable
|
33
34
|
# in raw mode can't jump onto line, when markdown is renderend can't jump either
|
34
|
-
rest = ps[pos + len(prefix):]
|
35
|
-
rest = re.sub(r':(\d+)$', r'#L\1', rest)
|
35
|
+
rest = ps[pos + len(prefix) :]
|
36
|
+
rest = re.sub(r':(\d+)$', r'#L\1', rest) # patch line number...
|
36
37
|
return repo + '/blame/master' + rest
|
37
38
|
|
38
39
|
# TODO doesn't work for git:
|
39
40
|
# TODO think about something more generic... this isn't too sustainable
|
41
|
+
|
40
42
|
# TODO not sure if context should be local or github?...
|
41
43
|
|
42
44
|
from . import auto
|
45
|
+
|
43
46
|
yield from auto.index(tp, *args, replacer=replacer, **kwargs)
|
promnesia/sources/viber.py
CHANGED
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
19
19
|
|
20
20
|
def index(
|
21
21
|
db_path: PathIsh = "~/.ViberPC/*/viber.db",
|
22
|
-
locator_schema: str="editor",
|
22
|
+
locator_schema: str = "editor",
|
23
23
|
*,
|
24
24
|
http_only: bool = False,
|
25
25
|
) -> Results:
|
@@ -40,7 +40,7 @@ def index(
|
|
40
40
|
yield from _harvest_db(db, msgs_query, locator_schema)
|
41
41
|
|
42
42
|
|
43
|
-
def messages_query(http_only: bool | None) -> str:
|
43
|
+
def messages_query(http_only: bool | None) -> str: # noqa: FBT001
|
44
44
|
"""
|
45
45
|
An SQL-query returning 1 row for each message
|
46
46
|
|
promnesia/sources/website.py
CHANGED
@@ -27,7 +27,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
27
27
|
'-A', 'html,html,txt', # TODO eh, ideally would use mime type I guess...
|
28
28
|
'--no-parent',
|
29
29
|
url,
|
30
|
-
]
|
30
|
+
] # fmt: skip
|
31
31
|
# TODO follow sitemap? e.g. gwern
|
32
32
|
logger.info(' '.join(cmd))
|
33
33
|
res = run(cmd, check=False)
|
@@ -39,12 +39,12 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
39
39
|
# rest of the errors are a bit more critical..
|
40
40
|
res.check_returncode()
|
41
41
|
|
42
|
-
def replacer(p: PathIsh, prefix: str=str(tp), url: str=url) -> str:
|
42
|
+
def replacer(p: PathIsh, prefix: str = str(tp), url: str = url) -> str:
|
43
43
|
ps = str(p)
|
44
44
|
pos = ps.find(prefix)
|
45
45
|
if pos == -1:
|
46
46
|
return ps
|
47
|
-
rest = ps[pos + len(prefix):]
|
47
|
+
rest = ps[pos + len(prefix) :]
|
48
48
|
# now this should look kinda like /domain.tld/rest (due to the way wget downloads stuff)
|
49
49
|
rest = re.sub(r'/.*?/', '/', rest)
|
50
50
|
return url + rest
|
@@ -54,4 +54,5 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
54
54
|
|
55
55
|
# TODO smarter html handling
|
56
56
|
from . import auto
|
57
|
+
|
57
58
|
yield from auto.index(tp, *args, replacer=replacer, **kwargs)
|
promnesia/sqlite.py
CHANGED
@@ -1,25 +1,28 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import sqlite3
|
4
|
-
from collections.abc import Iterator
|
4
|
+
from collections.abc import Callable, Iterator
|
5
5
|
from contextlib import contextmanager
|
6
|
-
from
|
7
|
-
|
8
|
-
from .common import PathIsh
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Literal
|
9
8
|
|
10
9
|
# NOTE: copy pasted from HPI
|
11
10
|
|
12
11
|
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
13
12
|
|
13
|
+
|
14
14
|
def dict_factory(cursor, row):
|
15
15
|
fields = [column[0] for column in cursor.description]
|
16
|
-
return dict(zip(fields, row))
|
16
|
+
return dict(zip(fields, row, strict=True))
|
17
|
+
|
17
18
|
|
19
|
+
Factory = SqliteRowFactory | Literal['row', 'dict']
|
18
20
|
|
19
|
-
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
20
21
|
|
21
22
|
@contextmanager
|
22
|
-
def sqlite_connection(
|
23
|
+
def sqlite_connection(
|
24
|
+
db: Path | str, *, immutable: bool = False, row_factory: Factory | None = None
|
25
|
+
) -> Iterator[sqlite3.Connection]:
|
23
26
|
dbp = f'file:{db}'
|
24
27
|
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
25
28
|
if immutable:
|
promnesia/tests/common.py
CHANGED
@@ -59,6 +59,7 @@ def get_testdata(path: str) -> Path:
|
|
59
59
|
@contextmanager
|
60
60
|
def tmp_popen(*args, **kwargs):
|
61
61
|
import psutil
|
62
|
+
|
62
63
|
with psutil.Popen(*args, **kwargs) as p:
|
63
64
|
try:
|
64
65
|
yield p
|
@@ -99,6 +100,7 @@ def reset_filters():
|
|
99
100
|
# TODO could be a TypeGuard from 3.10
|
100
101
|
V = TypeVar('V')
|
101
102
|
|
103
|
+
|
102
104
|
def unwrap(r: Res[V]) -> V:
|
103
105
|
assert not isinstance(r, Exception), r
|
104
106
|
return r
|
promnesia/tests/server_helper.py
CHANGED
@@ -45,8 +45,8 @@ def run_server(db: PathIsh | None = None, *, timezone: str | None = None) -> Ite
|
|
45
45
|
'--quiet',
|
46
46
|
'--port', port,
|
47
47
|
*([] if timezone is None else ['--timezone', timezone]),
|
48
|
-
*([] if db is None else ['--db'
|
49
|
-
]
|
48
|
+
*([] if db is None else ['--db', str(db)]),
|
49
|
+
] # fmt: skip
|
50
50
|
with tmp_popen(promnesia_bin(*args)) as server_process:
|
51
51
|
server = Helper(host=host, port=port, process=server_process)
|
52
52
|
|
@@ -13,16 +13,16 @@ def handled(p: PathIsh) -> bool:
|
|
13
13
|
|
14
14
|
def test_filetypes() -> None:
|
15
15
|
# test media
|
16
|
-
for ext in 'avi mp4 mp3 webm'
|
16
|
+
for ext in ['avi', 'mp4', 'mp3', 'webm'] + ([] if windows else ['mkv']):
|
17
17
|
assert handled('file.' + ext)
|
18
18
|
|
19
19
|
# images
|
20
|
-
for ext in 'gif jpg png jpeg'
|
20
|
+
for ext in ['gif', 'jpg', 'png', 'jpeg']:
|
21
21
|
assert handled('file.' + ext)
|
22
22
|
|
23
23
|
# TODO more granual checks that these are ignored?
|
24
24
|
# binaries
|
25
|
-
for ext in 'o sqlite'
|
25
|
+
for ext in ['o', 'sqlite'] + ([] if windows else ['class', 'jar']):
|
26
26
|
assert handled('file.' + ext)
|
27
27
|
|
28
28
|
# these might have potentially some links
|
@@ -31,13 +31,15 @@ def test_filetypes() -> None:
|
|
31
31
|
'pdf', 'epub', 'ps',
|
32
32
|
'doc', 'ppt', 'xsl',
|
33
33
|
# seriously, windows doesn't know about docx???
|
34
|
-
*([] if windows else 'docx pptx xlsx'
|
35
|
-
*([] if windows else 'ods odt rtf'
|
36
|
-
] + ([] if windows else 'djvu'
|
34
|
+
*([] if windows else ['docx', 'pptx', 'xlsx']),
|
35
|
+
*([] if windows else ['ods', 'odt', 'rtf']),
|
36
|
+
] + ([] if windows else ['djvu']): # fmt: skip
|
37
37
|
assert handled('file.' + ext)
|
38
38
|
|
39
39
|
# source code
|
40
|
-
for ext in 'rs tex el js sh hs pl h py hpp c go css'
|
40
|
+
for ext in ['rs', 'tex', 'el', 'js', 'sh', 'hs', 'pl', 'h', 'py', 'hpp', 'c', 'go', 'css'] + (
|
41
|
+
[] if windows else ['java', 'cpp']
|
42
|
+
):
|
41
43
|
assert handled('file.' + ext)
|
42
44
|
|
43
45
|
assert handled('x.html')
|
@@ -12,7 +12,7 @@ def index_hypothesis(tmp_path: Path) -> None:
|
|
12
12
|
from promnesia.common import Source
|
13
13
|
from promnesia.sources import hypothesis
|
14
14
|
|
15
|
-
SOURCES = [Source(hypothesis.index, name='hyp')]
|
15
|
+
SOURCES = [Source(hypothesis.index, name='hyp')] # noqa: F841
|
16
16
|
|
17
17
|
cfg_path = tmp_path / 'config.py'
|
18
18
|
write_config(cfg_path, cfg)
|
@@ -35,5 +35,9 @@ def test_hypothesis(tmp_path: Path) -> None:
|
|
35
35
|
|
36
36
|
assert vis.norm_url == 'wired.com/2017/04/the-myth-of-a-superhuman-ai'
|
37
37
|
assert vis.orig_url == 'https://www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
|
38
|
-
assert
|
39
|
-
|
38
|
+
assert (
|
39
|
+
vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
|
40
|
+
)
|
41
|
+
assert 'misconception about evolution is fueling misconception about AI' in (
|
42
|
+
vis.context or ''
|
43
|
+
) # contains notes as well
|
@@ -14,7 +14,9 @@ def delrf(s: str | None) -> str | None:
|
|
14
14
|
|
15
15
|
|
16
16
|
def test_org_indexer() -> None:
|
17
|
-
[_, cpp, cozy] = [
|
17
|
+
[_, cpp, cozy] = [
|
18
|
+
v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))
|
19
|
+
]
|
18
20
|
|
19
21
|
assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc'
|
20
22
|
# TODO not sure about filetags?
|
@@ -33,7 +35,10 @@ def test_org_indexer_2() -> None:
|
|
33
35
|
items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))]
|
34
36
|
|
35
37
|
assert len(items) == 6
|
36
|
-
assert
|
38
|
+
assert (
|
39
|
+
items[0].url
|
40
|
+
== 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
|
41
|
+
)
|
37
42
|
assert items[1].url == 'https://link.com'
|
38
43
|
assert items[-2].url == 'https://en.wikipedia.org/wiki/Resilio_Sync'
|
39
44
|
# TODO shit def need org specific url extractor (and then extract from everything remaining)
|
@@ -5,13 +5,15 @@ from ..common import get_testdata, unwrap
|
|
5
5
|
|
6
6
|
|
7
7
|
def test_plaintext_path_extractor() -> None:
|
8
|
-
visits = list(
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
8
|
+
visits = list(
|
9
|
+
extract_visits(
|
10
|
+
Source(
|
11
|
+
shellcmd.index,
|
12
|
+
plaintext.extract_from_path(get_testdata('custom')),
|
13
|
+
),
|
14
|
+
src='whatever',
|
15
|
+
)
|
16
|
+
)
|
15
17
|
assert {unwrap(v).orig_url for v in visits} == {
|
16
18
|
'http://google.com',
|
17
19
|
'http://google.com/',
|
@@ -8,14 +8,15 @@ from ..common import get_testdata
|
|
8
8
|
|
9
9
|
@pytest.mark.skipif(_is_windows, reason="no grep on windows")
|
10
10
|
def test_via_grep() -> None:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
visits = list(
|
12
|
+
extract_visits(
|
13
|
+
Source(
|
14
|
+
shellcmd.index,
|
15
|
+
# meh. maybe should deprecate plain string here...
|
16
|
+
r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
|
17
|
+
),
|
18
|
+
src='whatever',
|
19
|
+
)
|
20
|
+
)
|
20
21
|
# TODO I guess filtering of equivalent urls should rather be tested on something having context (e.g. org mode)
|
21
22
|
assert len(visits) == 5
|