promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +18 -4
- promnesia/__main__.py +104 -78
- promnesia/cannon.py +108 -107
- promnesia/common.py +107 -88
- promnesia/compare.py +33 -30
- promnesia/compat.py +10 -10
- promnesia/config.py +37 -34
- promnesia/database/common.py +4 -3
- promnesia/database/dump.py +13 -13
- promnesia/database/load.py +7 -7
- promnesia/extract.py +19 -17
- promnesia/logging.py +27 -15
- promnesia/misc/install_server.py +32 -27
- promnesia/server.py +106 -79
- promnesia/sources/auto.py +104 -77
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +20 -10
- promnesia/sources/browser_legacy.py +65 -50
- promnesia/sources/demo.py +7 -8
- promnesia/sources/fbmessenger.py +3 -3
- promnesia/sources/filetypes.py +22 -16
- promnesia/sources/github.py +9 -8
- promnesia/sources/guess.py +6 -2
- promnesia/sources/hackernews.py +7 -9
- promnesia/sources/hpi.py +5 -3
- promnesia/sources/html.py +11 -7
- promnesia/sources/hypothesis.py +3 -2
- promnesia/sources/instapaper.py +3 -2
- promnesia/sources/markdown.py +22 -12
- promnesia/sources/org.py +36 -17
- promnesia/sources/plaintext.py +41 -39
- promnesia/sources/pocket.py +5 -3
- promnesia/sources/reddit.py +24 -26
- promnesia/sources/roamresearch.py +5 -2
- promnesia/sources/rss.py +6 -8
- promnesia/sources/shellcmd.py +21 -11
- promnesia/sources/signal.py +27 -26
- promnesia/sources/smscalls.py +2 -3
- promnesia/sources/stackexchange.py +5 -4
- promnesia/sources/takeout.py +37 -34
- promnesia/sources/takeout_legacy.py +29 -19
- promnesia/sources/telegram.py +18 -12
- promnesia/sources/telegram_legacy.py +22 -11
- promnesia/sources/twitter.py +7 -6
- promnesia/sources/vcs.py +11 -6
- promnesia/sources/viber.py +11 -10
- promnesia/sources/website.py +8 -7
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +13 -7
- promnesia/tests/common.py +10 -5
- promnesia/tests/server_helper.py +13 -10
- promnesia/tests/sources/test_auto.py +2 -3
- promnesia/tests/sources/test_filetypes.py +11 -8
- promnesia/tests/sources/test_hypothesis.py +10 -6
- promnesia/tests/sources/test_org.py +9 -5
- promnesia/tests/sources/test_plaintext.py +9 -8
- promnesia/tests/sources/test_shellcmd.py +13 -13
- promnesia/tests/sources/test_takeout.py +3 -5
- promnesia/tests/test_cannon.py +256 -239
- promnesia/tests/test_cli.py +12 -8
- promnesia/tests/test_compare.py +17 -13
- promnesia/tests/test_config.py +7 -8
- promnesia/tests/test_db_dump.py +15 -15
- promnesia/tests/test_extract.py +17 -10
- promnesia/tests/test_indexer.py +24 -18
- promnesia/tests/test_server.py +12 -13
- promnesia/tests/test_traverse.py +0 -2
- promnesia/tests/utils.py +3 -7
- promnesia-1.4.20250909.dist-info/METADATA +66 -0
- promnesia-1.4.20250909.dist-info/RECORD +80 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
- promnesia/kjson.py +0 -121
- promnesia/sources/__init__.pyi +0 -0
- promnesia-1.2.20240810.dist-info/METADATA +0 -54
- promnesia-1.2.20240810.dist-info/RECORD +0 -83
- promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/sources/twitter.py
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
'''
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Twitter data.
|
3
3
|
'''
|
4
|
-
from typing import Iterable
|
5
4
|
|
6
|
-
from
|
5
|
+
from collections.abc import Iterable
|
6
|
+
|
7
|
+
from promnesia.common import Loc, Res, Results, Visit, extract_urls, logger
|
7
8
|
|
8
9
|
|
9
10
|
def index() -> Results:
|
10
|
-
from . import hpi
|
11
|
+
from . import hpi # noqa: F401,I001
|
11
12
|
import my.twitter.all as tw
|
13
|
+
from my.twitter.archive import Tweet # todo extract to common or something?
|
14
|
+
|
12
15
|
# TODO hmm. tweets themselves are sort of visits? not sure if they should contribute..
|
13
16
|
processed = 0
|
14
|
-
|
15
|
-
from my.twitter.archive import Tweet # todo extract to common or something?
|
16
17
|
tweets: Iterable[Res[Tweet]] = tw.tweets()
|
17
18
|
for t in tweets:
|
18
19
|
if isinstance(t, Exception):
|
@@ -22,7 +23,7 @@ def index() -> Results:
|
|
22
23
|
processed += 1
|
23
24
|
try:
|
24
25
|
urls = t.urls
|
25
|
-
except Exception as e:
|
26
|
+
except Exception as e: # just in case..
|
26
27
|
yield e
|
27
28
|
urls = []
|
28
29
|
|
promnesia/sources/vcs.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1
1
|
'''
|
2
2
|
Clones & indexes Git repositories (via sources.auto)
|
3
3
|
'''
|
4
|
-
# TODO not sure if worth exposing... could be just handled by auto or something?)
|
5
4
|
|
6
|
-
from
|
5
|
+
from __future__ import annotations
|
6
|
+
|
7
7
|
import re
|
8
|
+
from collections.abc import Iterable
|
9
|
+
|
10
|
+
# TODO not sure if worth exposing... could be just handled by auto or something?)
|
11
|
+
from pathlib import Path
|
8
12
|
from subprocess import check_call
|
9
|
-
from typing import Iterable
|
10
13
|
|
11
14
|
from ..common import Extraction, PathIsh, get_tmpdir, slugify
|
12
15
|
|
@@ -20,7 +23,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
20
23
|
# note: https://bugs.python.org/issue33617 , it doesn't like Path here on Windows
|
21
24
|
check_call(['git', 'clone', repo, str(tp)])
|
22
25
|
|
23
|
-
def replacer(p: PathIsh, prefix: str=str(tp), repo: str=repo) -> str:
|
26
|
+
def replacer(p: PathIsh, prefix: str = str(tp), repo: str = repo) -> str:
|
24
27
|
ps = str(p)
|
25
28
|
# TODO prefix is a bit misleading
|
26
29
|
pos = ps.find(prefix)
|
@@ -29,13 +32,15 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
29
32
|
return ps
|
30
33
|
# TODO ugh. seems that blame view https://github.com/davidgasquez/handbook/blame/master/README.md#L25 is the most reliable
|
31
34
|
# in raw mode can't jump onto line, when markdown is renderend can't jump either
|
32
|
-
rest = ps[pos + len(prefix):]
|
33
|
-
rest = re.sub(r':(\d+)$', r'#L\1', rest)
|
35
|
+
rest = ps[pos + len(prefix) :]
|
36
|
+
rest = re.sub(r':(\d+)$', r'#L\1', rest) # patch line number...
|
34
37
|
return repo + '/blame/master' + rest
|
35
38
|
|
36
39
|
# TODO doesn't work for git:
|
37
40
|
# TODO think about something more generic... this isn't too sustainable
|
41
|
+
|
38
42
|
# TODO not sure if context should be local or github?...
|
39
43
|
|
40
44
|
from . import auto
|
45
|
+
|
41
46
|
yield from auto.index(tp, *args, replacer=replacer, **kwargs)
|
promnesia/sources/viber.py
CHANGED
@@ -2,23 +2,24 @@
|
|
2
2
|
Collects visits from Viber desktop app (e.g. `~/.ViberPC/XYZ123/viber.db`)
|
3
3
|
"""
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
8
|
+
import sqlite3
|
6
9
|
import textwrap
|
10
|
+
from collections.abc import Iterable
|
7
11
|
from os import PathLike
|
8
12
|
from pathlib import Path
|
9
|
-
import sqlite3
|
10
|
-
from typing import Iterable, Optional
|
11
13
|
|
12
14
|
from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch, join_tags
|
13
15
|
from ..sqlite import sqlite_connection
|
14
16
|
|
15
|
-
|
16
17
|
logger = logging.getLogger(__name__)
|
17
18
|
|
18
19
|
|
19
20
|
def index(
|
20
21
|
db_path: PathIsh = "~/.ViberPC/*/viber.db",
|
21
|
-
locator_schema: str="editor",
|
22
|
+
locator_schema: str = "editor",
|
22
23
|
*,
|
23
24
|
http_only: bool = False,
|
24
25
|
) -> Results:
|
@@ -34,12 +35,12 @@ def index(
|
|
34
35
|
|
35
36
|
msgs_query = messages_query(http_only)
|
36
37
|
|
37
|
-
for
|
38
|
-
assert
|
39
|
-
yield from _harvest_db(
|
38
|
+
for db in _get_files(db_path):
|
39
|
+
assert db.is_file(), f"Is it a (Viber-desktop sqlite) file? {db}"
|
40
|
+
yield from _harvest_db(db, msgs_query, locator_schema)
|
40
41
|
|
41
42
|
|
42
|
-
def messages_query(http_only:
|
43
|
+
def messages_query(http_only: bool | None) -> str: # noqa: FBT001
|
43
44
|
"""
|
44
45
|
An SQL-query returning 1 row for each message
|
45
46
|
|
@@ -123,7 +124,7 @@ def _handle_row(row: sqlite3.Row, db_path: PathLike, locator_schema: str) -> Res
|
|
123
124
|
tags: str = row["tags"]
|
124
125
|
url_title: str = row["url_title"]
|
125
126
|
|
126
|
-
assert (
|
127
|
+
assert ( # noqa: PT018
|
127
128
|
text and mid and sender and chatname
|
128
129
|
), f"sql-query should eliminate messages without 'http' or missing ids: {row}"
|
129
130
|
|
@@ -154,7 +155,7 @@ def _get_files(path: PathIsh) -> Iterable[Path]:
|
|
154
155
|
"""
|
155
156
|
path = Path(path).expanduser()
|
156
157
|
parts = path.parts[1:] if path.is_absolute() else path.parts
|
157
|
-
return Path(path.root).glob(str(Path("").joinpath(*parts)))
|
158
|
+
return Path(path.root).glob(str(Path("").joinpath(*parts))) # noqa: PTH201
|
158
159
|
|
159
160
|
|
160
161
|
def _harvest_db(db_path: PathIsh, msgs_query: str, locator_schema: str) -> Results:
|
promnesia/sources/website.py
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
Clones a website with wget and indexes via sources.auto
|
3
3
|
'''
|
4
4
|
|
5
|
-
from pathlib import Path
|
6
5
|
import re
|
6
|
+
from collections.abc import Iterable
|
7
|
+
from pathlib import Path
|
7
8
|
from subprocess import run
|
8
|
-
from typing import Iterable
|
9
9
|
|
10
|
-
from
|
10
|
+
from promnesia.common import Extraction, PathIsh, get_logger, get_tmpdir, slugify
|
11
11
|
|
12
12
|
|
13
13
|
def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
@@ -27,10 +27,10 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
27
27
|
'-A', 'html,html,txt', # TODO eh, ideally would use mime type I guess...
|
28
28
|
'--no-parent',
|
29
29
|
url,
|
30
|
-
]
|
30
|
+
] # fmt: skip
|
31
31
|
# TODO follow sitemap? e.g. gwern
|
32
32
|
logger.info(' '.join(cmd))
|
33
|
-
res = run(cmd)
|
33
|
+
res = run(cmd, check=False)
|
34
34
|
|
35
35
|
if res.returncode == 8:
|
36
36
|
# man wget: 8 means server error (e.g. broken link)
|
@@ -39,12 +39,12 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
39
39
|
# rest of the errors are a bit more critical..
|
40
40
|
res.check_returncode()
|
41
41
|
|
42
|
-
def replacer(p: PathIsh, prefix: str=str(tp), url: str=url) -> str:
|
42
|
+
def replacer(p: PathIsh, prefix: str = str(tp), url: str = url) -> str:
|
43
43
|
ps = str(p)
|
44
44
|
pos = ps.find(prefix)
|
45
45
|
if pos == -1:
|
46
46
|
return ps
|
47
|
-
rest = ps[pos + len(prefix):]
|
47
|
+
rest = ps[pos + len(prefix) :]
|
48
48
|
# now this should look kinda like /domain.tld/rest (due to the way wget downloads stuff)
|
49
49
|
rest = re.sub(r'/.*?/', '/', rest)
|
50
50
|
return url + rest
|
@@ -54,4 +54,5 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
|
|
54
54
|
|
55
55
|
# TODO smarter html handling
|
56
56
|
from . import auto
|
57
|
+
|
57
58
|
yield from auto.index(tp, *args, replacer=replacer, **kwargs)
|
promnesia/sources/zulip.py
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
Uses [[https://github.com/karlicoss/HPI][HPI]] for Zulip data.
|
3
3
|
'''
|
4
4
|
|
5
|
-
from
|
5
|
+
from promnesia.common import Loc, Results, Visit, iter_urls
|
6
6
|
|
7
7
|
|
8
8
|
def index() -> Results:
|
9
|
-
from . import hpi
|
9
|
+
from . import hpi # noqa: F401,I001
|
10
10
|
import my.zulip.organization as Z
|
11
|
+
|
11
12
|
for m in Z.messages():
|
12
13
|
if isinstance(m, Exception):
|
13
14
|
yield m
|
promnesia/sqlite.py
CHANGED
@@ -1,22 +1,28 @@
|
|
1
|
-
from
|
2
|
-
import sqlite3
|
3
|
-
from typing import Callable, Optional, Any, Iterator, Union, Literal
|
1
|
+
from __future__ import annotations
|
4
2
|
|
5
|
-
|
3
|
+
import sqlite3
|
4
|
+
from collections.abc import Callable, Iterator
|
5
|
+
from contextlib import contextmanager
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Literal
|
6
8
|
|
7
9
|
# NOTE: copy pasted from HPI
|
8
10
|
|
9
11
|
SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
|
10
12
|
|
13
|
+
|
11
14
|
def dict_factory(cursor, row):
|
12
15
|
fields = [column[0] for column in cursor.description]
|
13
|
-
return
|
16
|
+
return dict(zip(fields, row, strict=True))
|
17
|
+
|
14
18
|
|
19
|
+
Factory = SqliteRowFactory | Literal['row', 'dict']
|
15
20
|
|
16
|
-
Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
|
17
21
|
|
18
22
|
@contextmanager
|
19
|
-
def sqlite_connection(
|
23
|
+
def sqlite_connection(
|
24
|
+
db: Path | str, *, immutable: bool = False, row_factory: Factory | None = None
|
25
|
+
) -> Iterator[sqlite3.Connection]:
|
20
26
|
dbp = f'file:{db}'
|
21
27
|
# https://www.sqlite.org/draft/uri.html#uriimmutable
|
22
28
|
if immutable:
|
promnesia/tests/common.py
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import gc
|
3
4
|
import inspect
|
4
5
|
import os
|
5
|
-
from pathlib import Path
|
6
6
|
import socket
|
7
7
|
import sys
|
8
|
+
from collections.abc import Iterator
|
9
|
+
from contextlib import closing, contextmanager
|
10
|
+
from pathlib import Path
|
8
11
|
from textwrap import dedent
|
9
|
-
from typing import
|
12
|
+
from typing import NoReturn, TypeVar
|
10
13
|
|
11
14
|
import pytest
|
12
15
|
|
13
|
-
from ..common import
|
16
|
+
from ..common import Res, _is_windows
|
14
17
|
|
15
18
|
|
16
19
|
def under_ci() -> bool:
|
@@ -25,7 +28,7 @@ def throw(x: Exception) -> NoReturn:
|
|
25
28
|
|
26
29
|
|
27
30
|
@pytest.fixture
|
28
|
-
def gc_control(gc_on: bool):
|
31
|
+
def gc_control(*, gc_on: bool):
|
29
32
|
if gc_on:
|
30
33
|
# no need to do anything, should be on by default
|
31
34
|
yield
|
@@ -56,6 +59,7 @@ def get_testdata(path: str) -> Path:
|
|
56
59
|
@contextmanager
|
57
60
|
def tmp_popen(*args, **kwargs):
|
58
61
|
import psutil
|
62
|
+
|
59
63
|
with psutil.Popen(*args, **kwargs) as p:
|
60
64
|
try:
|
61
65
|
yield p
|
@@ -96,6 +100,7 @@ def reset_filters():
|
|
96
100
|
# TODO could be a TypeGuard from 3.10
|
97
101
|
V = TypeVar('V')
|
98
102
|
|
103
|
+
|
99
104
|
def unwrap(r: Res[V]) -> V:
|
100
105
|
assert not isinstance(r, Exception), r
|
101
106
|
return r
|
promnesia/tests/server_helper.py
CHANGED
@@ -1,15 +1,18 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import sys
|
4
|
+
import time
|
5
|
+
from collections.abc import Iterator
|
1
6
|
from contextlib import contextmanager
|
2
7
|
from dataclasses import dataclass
|
3
8
|
from pathlib import Path
|
4
|
-
import
|
5
|
-
import time
|
6
|
-
from typing import Any, Dict, Iterator, Optional
|
9
|
+
from typing import Any
|
7
10
|
|
8
11
|
import psutil
|
9
12
|
import requests
|
10
13
|
|
11
14
|
from ..common import PathIsh
|
12
|
-
from .common import
|
15
|
+
from .common import free_port, promnesia_bin, tmp_popen
|
13
16
|
|
14
17
|
|
15
18
|
@dataclass
|
@@ -18,18 +21,18 @@ class Helper:
|
|
18
21
|
port: str
|
19
22
|
process: psutil.Popen
|
20
23
|
|
21
|
-
def get(self, path: str
|
24
|
+
def get(self, path: str):
|
22
25
|
# check it's alive first so the error is cleaner
|
23
26
|
assert self.process.poll() is None, self.process
|
24
27
|
return requests.get(f'http://{self.host}:{self.port}' + path)
|
25
28
|
|
26
|
-
def post(self, path: str, *, json:
|
29
|
+
def post(self, path: str, *, json: dict[str, Any] | None = None):
|
27
30
|
assert self.process.poll() is None, self.process
|
28
31
|
return requests.post(f'http://{self.host}:{self.port}' + path, json=json)
|
29
32
|
|
30
33
|
|
31
34
|
@contextmanager
|
32
|
-
def run_server(db:
|
35
|
+
def run_server(db: PathIsh | None = None, *, timezone: str | None = None) -> Iterator[Helper]:
|
33
36
|
# TODO not sure, perhaps best to use a thread or something?
|
34
37
|
# but for some tests makes more sense to test in a separate process
|
35
38
|
with free_port() as pp:
|
@@ -42,8 +45,8 @@ def run_server(db: Optional[PathIsh] = None, *, timezone: Optional[str] = None)
|
|
42
45
|
'--quiet',
|
43
46
|
'--port', port,
|
44
47
|
*([] if timezone is None else ['--timezone', timezone]),
|
45
|
-
*([] if db is None else ['--db'
|
46
|
-
]
|
48
|
+
*([] if db is None else ['--db', str(db)]),
|
49
|
+
] # fmt: skip
|
47
50
|
with tmp_popen(promnesia_bin(*args)) as server_process:
|
48
51
|
server = Helper(host=host, port=port, process=server_process)
|
49
52
|
|
@@ -56,7 +59,7 @@ def run_server(db: Optional[PathIsh] = None, *, timezone: Optional[str] = None)
|
|
56
59
|
time.sleep(0.1)
|
57
60
|
else:
|
58
61
|
raise RuntimeError("Cooldn't connect to '{st}' after 50 attempts")
|
59
|
-
print("Started server up, db: {db}"
|
62
|
+
print(f"Started server up, db: {db}", file=sys.stderr)
|
60
63
|
|
61
64
|
yield server
|
62
65
|
|
@@ -1,8 +1,7 @@
|
|
1
|
-
from itertools import groupby
|
2
1
|
import os
|
2
|
+
from itertools import groupby
|
3
3
|
|
4
4
|
from ...sources import auto
|
5
|
-
|
6
5
|
from ..common import get_testdata, throw
|
7
6
|
|
8
7
|
sa2464 = 'https://www.scottaaronson.com/blog/?p=2464'
|
@@ -19,7 +18,7 @@ def makemap(visits):
|
|
19
18
|
def it():
|
20
19
|
vit = (throw(v) if isinstance(v, Exception) else v for v in visits)
|
21
20
|
for k, g in groupby(sorted(vit, key=key), key=key):
|
22
|
-
yield k,
|
21
|
+
yield k, sorted(g)
|
23
22
|
|
24
23
|
return dict(it())
|
25
24
|
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
|
-
from ...common import PathIsh
|
3
|
+
from ...common import PathIsh
|
4
|
+
from ...common import _is_windows as windows
|
4
5
|
from ...sources.auto import by_path
|
5
6
|
|
6
7
|
|
@@ -12,16 +13,16 @@ def handled(p: PathIsh) -> bool:
|
|
12
13
|
|
13
14
|
def test_filetypes() -> None:
|
14
15
|
# test media
|
15
|
-
for ext in 'avi mp4 mp3 webm'
|
16
|
+
for ext in ['avi', 'mp4', 'mp3', 'webm'] + ([] if windows else ['mkv']):
|
16
17
|
assert handled('file.' + ext)
|
17
18
|
|
18
19
|
# images
|
19
|
-
for ext in 'gif jpg png jpeg'
|
20
|
+
for ext in ['gif', 'jpg', 'png', 'jpeg']:
|
20
21
|
assert handled('file.' + ext)
|
21
22
|
|
22
23
|
# TODO more granual checks that these are ignored?
|
23
24
|
# binaries
|
24
|
-
for ext in 'o sqlite'
|
25
|
+
for ext in ['o', 'sqlite'] + ([] if windows else ['class', 'jar']):
|
25
26
|
assert handled('file.' + ext)
|
26
27
|
|
27
28
|
# these might have potentially some links
|
@@ -30,13 +31,15 @@ def test_filetypes() -> None:
|
|
30
31
|
'pdf', 'epub', 'ps',
|
31
32
|
'doc', 'ppt', 'xsl',
|
32
33
|
# seriously, windows doesn't know about docx???
|
33
|
-
*([] if windows else 'docx pptx xlsx'
|
34
|
-
*([] if windows else 'ods odt rtf'
|
35
|
-
] + ([] if windows else 'djvu'
|
34
|
+
*([] if windows else ['docx', 'pptx', 'xlsx']),
|
35
|
+
*([] if windows else ['ods', 'odt', 'rtf']),
|
36
|
+
] + ([] if windows else ['djvu']): # fmt: skip
|
36
37
|
assert handled('file.' + ext)
|
37
38
|
|
38
39
|
# source code
|
39
|
-
for ext in 'rs tex el js sh hs pl h py hpp c go css'
|
40
|
+
for ext in ['rs', 'tex', 'el', 'js', 'sh', 'hs', 'pl', 'h', 'py', 'hpp', 'c', 'go', 'css'] + (
|
41
|
+
[] if windows else ['java', 'cpp']
|
42
|
+
):
|
40
43
|
assert handled('file.' + ext)
|
41
44
|
|
42
45
|
assert handled('x.html')
|
@@ -1,10 +1,10 @@
|
|
1
1
|
from pathlib import Path
|
2
2
|
|
3
|
-
from
|
3
|
+
from my.core.cfg import tmp_config
|
4
|
+
|
4
5
|
from ...__main__ import do_index
|
5
6
|
from ...database.load import get_all_db_visits
|
6
|
-
|
7
|
-
from my.core.cfg import tmp_config
|
7
|
+
from ..common import get_testdata, write_config
|
8
8
|
|
9
9
|
|
10
10
|
def index_hypothesis(tmp_path: Path) -> None:
|
@@ -12,7 +12,7 @@ def index_hypothesis(tmp_path: Path) -> None:
|
|
12
12
|
from promnesia.common import Source
|
13
13
|
from promnesia.sources import hypothesis
|
14
14
|
|
15
|
-
SOURCES = [Source(hypothesis.index, name='hyp')]
|
15
|
+
SOURCES = [Source(hypothesis.index, name='hyp')] # noqa: F841
|
16
16
|
|
17
17
|
cfg_path = tmp_path / 'config.py'
|
18
18
|
write_config(cfg_path, cfg)
|
@@ -35,5 +35,9 @@ def test_hypothesis(tmp_path: Path) -> None:
|
|
35
35
|
|
36
36
|
assert vis.norm_url == 'wired.com/2017/04/the-myth-of-a-superhuman-ai'
|
37
37
|
assert vis.orig_url == 'https://www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
|
38
|
-
assert
|
39
|
-
|
38
|
+
assert (
|
39
|
+
vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
|
40
|
+
)
|
41
|
+
assert 'misconception about evolution is fueling misconception about AI' in (
|
42
|
+
vis.context or ''
|
43
|
+
) # contains notes as well
|
@@ -1,12 +1,11 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
2
|
|
3
3
|
from ...common import Visit
|
4
4
|
from ...sources.org import extract_from_file
|
5
|
-
|
6
5
|
from ..common import get_testdata, throw
|
7
6
|
|
8
7
|
|
9
|
-
def delrf(s:
|
8
|
+
def delrf(s: str | None) -> str | None:
|
10
9
|
if s is None:
|
11
10
|
return None
|
12
11
|
# meh.. not sure how ot handle this properly, ideally should be via pytest?
|
@@ -15,7 +14,9 @@ def delrf(s: Optional[str]) -> Optional[str]:
|
|
15
14
|
|
16
15
|
|
17
16
|
def test_org_indexer() -> None:
|
18
|
-
[_, cpp, cozy] = [
|
17
|
+
[_, cpp, cozy] = [
|
18
|
+
v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))
|
19
|
+
]
|
19
20
|
|
20
21
|
assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc'
|
21
22
|
# TODO not sure about filetags?
|
@@ -34,7 +35,10 @@ def test_org_indexer_2() -> None:
|
|
34
35
|
items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))]
|
35
36
|
|
36
37
|
assert len(items) == 6
|
37
|
-
assert
|
38
|
+
assert (
|
39
|
+
items[0].url
|
40
|
+
== 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
|
41
|
+
)
|
38
42
|
assert items[1].url == 'https://link.com'
|
39
43
|
assert items[-2].url == 'https://en.wikipedia.org/wiki/Resilio_Sync'
|
40
44
|
# TODO shit def need org specific url extractor (and then extract from everything remaining)
|
@@ -1,18 +1,19 @@
|
|
1
1
|
from ...common import Source
|
2
2
|
from ...extract import extract_visits
|
3
3
|
from ...sources import plaintext, shellcmd
|
4
|
-
|
5
4
|
from ..common import get_testdata, unwrap
|
6
5
|
|
7
6
|
|
8
7
|
def test_plaintext_path_extractor() -> None:
|
9
|
-
visits = list(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
8
|
+
visits = list(
|
9
|
+
extract_visits(
|
10
|
+
Source(
|
11
|
+
shellcmd.index,
|
12
|
+
plaintext.extract_from_path(get_testdata('custom')),
|
13
|
+
),
|
14
|
+
src='whatever',
|
15
|
+
)
|
16
|
+
)
|
16
17
|
assert {unwrap(v).orig_url for v in visits} == {
|
17
18
|
'http://google.com',
|
18
19
|
'http://google.com/',
|
@@ -1,22 +1,22 @@
|
|
1
|
-
from ...common import _is_windows, Source
|
2
|
-
from ...extract import extract_visits
|
3
|
-
from ...sources import shellcmd
|
4
|
-
|
5
1
|
import pytest
|
6
2
|
|
3
|
+
from ...common import Source, _is_windows
|
4
|
+
from ...extract import extract_visits
|
5
|
+
from ...sources import shellcmd
|
7
6
|
from ..common import get_testdata
|
8
7
|
|
9
8
|
|
10
9
|
@pytest.mark.skipif(_is_windows, reason="no grep on windows")
|
11
10
|
def test_via_grep() -> None:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
11
|
+
visits = list(
|
12
|
+
extract_visits(
|
13
|
+
Source(
|
14
|
+
shellcmd.index,
|
15
|
+
# meh. maybe should deprecate plain string here...
|
16
|
+
r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
|
17
|
+
),
|
18
|
+
src='whatever',
|
19
|
+
)
|
20
|
+
)
|
21
21
|
# TODO I guess filtering of equivalent urls should rather be tested on something having context (e.g. org mode)
|
22
22
|
assert len(visits) == 5
|
@@ -1,15 +1,13 @@
|
|
1
1
|
from datetime import datetime, timezone
|
2
2
|
|
3
|
+
import pytest
|
4
|
+
from my.core.cfg import tmp_config
|
5
|
+
|
3
6
|
from ...common import Source
|
4
7
|
from ...extract import extract_visits
|
5
8
|
from ...sources import takeout
|
6
|
-
|
7
|
-
import pytest
|
8
|
-
|
9
9
|
from ..common import get_testdata, unwrap
|
10
10
|
|
11
|
-
from my.core.cfg import tmp_config
|
12
|
-
|
13
11
|
|
14
12
|
# TODO apply in conftest so it's used in all tests?
|
15
13
|
@pytest.fixture
|