promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +38 -25
- promnesia/cannon.py +23 -23
- promnesia/common.py +49 -42
- promnesia/compare.py +18 -20
- promnesia/compat.py +10 -10
- promnesia/config.py +20 -22
- promnesia/database/common.py +4 -3
- promnesia/database/dump.py +14 -13
- promnesia/database/load.py +7 -7
- promnesia/extract.py +13 -11
- promnesia/kjson.py +11 -10
- promnesia/logging.py +1 -1
- promnesia/misc/install_server.py +7 -8
- promnesia/server.py +42 -31
- promnesia/sources/auto.py +43 -30
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +17 -13
- promnesia/sources/demo.py +7 -7
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +9 -7
- promnesia/sources/github.py +5 -7
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +3 -2
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +17 -7
- promnesia/sources/org.py +20 -10
- promnesia/sources/plaintext.py +30 -31
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +19 -18
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +3 -4
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +14 -13
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +3 -2
- promnesia/sources/takeout.py +23 -13
- promnesia/sources/takeout_legacy.py +15 -11
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/common.py +8 -5
- promnesia/tests/server_helper.py +11 -8
- promnesia/tests/sources/test_auto.py +2 -3
- promnesia/tests/sources/test_filetypes.py +2 -1
- promnesia/tests/sources/test_hypothesis.py +3 -3
- promnesia/tests/sources/test_org.py +2 -3
- promnesia/tests/sources/test_plaintext.py +0 -1
- promnesia/tests/sources/test_shellcmd.py +3 -4
- promnesia/tests/sources/test_takeout.py +3 -5
- promnesia/tests/test_cannon.py +5 -5
- promnesia/tests/test_cli.py +4 -6
- promnesia/tests/test_compare.py +1 -1
- promnesia/tests/test_config.py +7 -8
- promnesia/tests/test_db_dump.py +11 -12
- promnesia/tests/test_extract.py +10 -6
- promnesia/tests/test_indexer.py +14 -8
- promnesia/tests/test_server.py +2 -3
- promnesia/tests/test_traverse.py +0 -2
- promnesia/tests/utils.py +4 -4
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- promnesia-1.2.20240810.dist-info/RECORD +0 -83
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
- {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/compat.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
|
2
|
-
## keeping in case any sources depended on compat functions
|
3
|
-
from subprocess import PIPE, run, check_call, check_output, Popen
|
4
|
-
from typing import Protocol, Literal
|
5
|
-
##
|
1
|
+
from typing import TYPE_CHECKING
|
6
2
|
|
3
|
+
if not TYPE_CHECKING:
|
4
|
+
## we used to have compat fixes here for these for python3.7
|
5
|
+
## keeping in case any sources depended on compat functions
|
6
|
+
from subprocess import PIPE, Popen, check_call, check_output, run # noqa: F401
|
7
|
+
from typing import Literal, Protocol # noqa: F401
|
8
|
+
##
|
7
9
|
|
8
|
-
#
|
9
|
-
def removeprefix(text: str, prefix: str) -> str:
|
10
|
-
|
11
|
-
return text[len(prefix):]
|
12
|
-
return text
|
10
|
+
# todo deprecate properly
|
11
|
+
def removeprefix(text: str, prefix: str) -> str:
|
12
|
+
return text.removeprefix(prefix)
|
promnesia/config.py
CHANGED
@@ -1,21 +1,19 @@
|
|
1
|
-
from
|
2
|
-
|
3
|
-
from types import ModuleType
|
4
|
-
from typing import List, Optional, Union, NamedTuple, Iterable, Callable
|
1
|
+
from __future__ import annotations
|
2
|
+
|
5
3
|
import importlib
|
6
4
|
import importlib.util
|
5
|
+
import os
|
7
6
|
import warnings
|
7
|
+
from collections.abc import Iterable
|
8
|
+
from pathlib import Path
|
9
|
+
from types import ModuleType
|
10
|
+
from typing import Callable, NamedTuple, Union
|
8
11
|
|
9
|
-
from .common import PathIsh,
|
10
|
-
from .common import Res, Source, DbVisit
|
11
|
-
|
12
|
+
from .common import DbVisit, PathIsh, Res, Source, default_cache_dir, default_output_dir
|
12
13
|
|
13
14
|
HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]
|
14
15
|
|
15
16
|
|
16
|
-
from typing import Any
|
17
|
-
|
18
|
-
|
19
17
|
ModuleName = str
|
20
18
|
|
21
19
|
# something that can be converted into a proper Source
|
@@ -24,19 +22,19 @@ ConfigSource = Union[Source, ModuleName, ModuleType]
|
|
24
22
|
|
25
23
|
class Config(NamedTuple):
|
26
24
|
# TODO remove default from sources once migrated
|
27
|
-
SOURCES:
|
25
|
+
SOURCES: list[ConfigSource] = []
|
28
26
|
|
29
27
|
# if not specified, uses user data dir
|
30
|
-
OUTPUT_DIR:
|
28
|
+
OUTPUT_DIR: PathIsh | None = None
|
31
29
|
|
32
|
-
CACHE_DIR:
|
33
|
-
FILTERS:
|
30
|
+
CACHE_DIR: PathIsh | None = ''
|
31
|
+
FILTERS: list[str] = []
|
34
32
|
|
35
|
-
HOOK:
|
33
|
+
HOOK: HookT | None = None
|
36
34
|
|
37
35
|
#
|
38
36
|
# NOTE: INDEXERS is deprecated, use SOURCES instead
|
39
|
-
INDEXERS:
|
37
|
+
INDEXERS: list[ConfigSource] = []
|
40
38
|
#MIME_HANDLER: Optional[str] = None # TODO
|
41
39
|
|
42
40
|
@property
|
@@ -68,11 +66,11 @@ class Config(NamedTuple):
|
|
68
66
|
yield Source(r)
|
69
67
|
|
70
68
|
@property
|
71
|
-
def cache_dir(self) ->
|
69
|
+
def cache_dir(self) -> Path | None:
|
72
70
|
# TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
|
73
71
|
# keeping just in case for now
|
74
72
|
cd = self.CACHE_DIR
|
75
|
-
cpath:
|
73
|
+
cpath: Path | None
|
76
74
|
if cd is None:
|
77
75
|
cpath = None # means 'disabled' in cachew
|
78
76
|
elif cd == '': # meh.. but need to make it None friendly..
|
@@ -96,10 +94,10 @@ class Config(NamedTuple):
|
|
96
94
|
return self.output_dir / 'promnesia.sqlite'
|
97
95
|
|
98
96
|
@property
|
99
|
-
def hook(self) ->
|
97
|
+
def hook(self) -> HookT | None:
|
100
98
|
return self.HOOK
|
101
99
|
|
102
|
-
instance:
|
100
|
+
instance: Config | None = None
|
103
101
|
|
104
102
|
|
105
103
|
def has() -> bool:
|
@@ -139,7 +137,7 @@ def import_config(config_file: PathIsh) -> Config:
|
|
139
137
|
|
140
138
|
|
141
139
|
# TODO: ugh. this causes warnings to be repeated multiple times... need to reuse the pool or something..
|
142
|
-
def use_cores() ->
|
140
|
+
def use_cores() -> int | None:
|
143
141
|
'''
|
144
142
|
Somewhat experimental.
|
145
143
|
For now only used in sources.auto, perhaps later will be shared among the other indexers.
|
@@ -154,7 +152,7 @@ def use_cores() -> Optional[int]:
|
|
154
152
|
return 0
|
155
153
|
|
156
154
|
|
157
|
-
def extra_fd_args() ->
|
155
|
+
def extra_fd_args() -> list[str]:
|
158
156
|
'''
|
159
157
|
Not sure where it belongs yet... so via env variable for now
|
160
158
|
Can be used to pass --ignore-file parameter
|
promnesia/database/common.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from collections.abc import Sequence
|
1
4
|
from datetime import datetime
|
2
|
-
from typing import Sequence, Tuple
|
3
5
|
|
4
6
|
from sqlalchemy import (
|
5
7
|
Column,
|
6
8
|
Integer,
|
7
|
-
Row,
|
8
9
|
String,
|
9
10
|
)
|
10
11
|
|
@@ -30,7 +31,7 @@ def get_columns() -> Sequence[Column]:
|
|
30
31
|
return res
|
31
32
|
|
32
33
|
|
33
|
-
def db_visit_to_row(v: DbVisit) ->
|
34
|
+
def db_visit_to_row(v: DbVisit) -> tuple:
|
34
35
|
# ugh, very hacky...
|
35
36
|
# we want to make sure the resulting tuple only consists of simple types
|
36
37
|
# so we can use dbengine directly
|
promnesia/database/dump.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import sqlite3
|
3
|
-
from
|
4
|
+
from collections.abc import Iterable
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Optional
|
4
7
|
|
5
8
|
from more_itertools import chunked
|
6
|
-
|
7
9
|
from sqlalchemy import (
|
8
10
|
Engine,
|
9
11
|
MetaData,
|
@@ -16,6 +18,7 @@ from sqlalchemy import (
|
|
16
18
|
)
|
17
19
|
from sqlalchemy.dialects import sqlite as dialect_sqlite
|
18
20
|
|
21
|
+
from .. import config
|
19
22
|
from ..common import (
|
20
23
|
DbVisit,
|
21
24
|
Loc,
|
@@ -24,9 +27,7 @@ from ..common import (
|
|
24
27
|
get_logger,
|
25
28
|
now_tz,
|
26
29
|
)
|
27
|
-
from .common import
|
28
|
-
from .. import config
|
29
|
-
|
30
|
+
from .common import db_visit_to_row, get_columns
|
30
31
|
|
31
32
|
# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
|
32
33
|
# since as far as sql is concerned it should all be in the same transaction. only a guess
|
@@ -50,7 +51,7 @@ def begin_immediate_transaction(conn):
|
|
50
51
|
conn.exec_driver_sql('BEGIN IMMEDIATE')
|
51
52
|
|
52
53
|
|
53
|
-
Stats =
|
54
|
+
Stats = dict[Optional[SourceName], int]
|
54
55
|
|
55
56
|
|
56
57
|
# returns critical warnings
|
@@ -58,8 +59,8 @@ def visits_to_sqlite(
|
|
58
59
|
vit: Iterable[Res[DbVisit]],
|
59
60
|
*,
|
60
61
|
overwrite_db: bool,
|
61
|
-
_db_path:
|
62
|
-
) ->
|
62
|
+
_db_path: Path | None = None, # only used in tests
|
63
|
+
) -> list[Exception]:
|
63
64
|
if _db_path is None:
|
64
65
|
db_path = config.get().db
|
65
66
|
else:
|
@@ -95,7 +96,7 @@ def visits_to_sqlite(
|
|
95
96
|
|
96
97
|
def query_total_stats(conn) -> Stats:
|
97
98
|
query = select(table.c.src, func.count(table.c.src)).select_from(table).group_by(table.c.src)
|
98
|
-
return
|
99
|
+
return dict(conn.execute(query).all())
|
99
100
|
|
100
101
|
def get_engine(*args, **kwargs) -> Engine:
|
101
102
|
# kwargs['echo'] = True # useful for debugging
|
@@ -122,7 +123,7 @@ def visits_to_sqlite(
|
|
122
123
|
# (note that this also requires WAL mode)
|
123
124
|
engine = get_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS})
|
124
125
|
|
125
|
-
cleared:
|
126
|
+
cleared: set[str] = set()
|
126
127
|
|
127
128
|
# by default, sqlalchemy does some sort of BEGIN (implicit) transaction, which doesn't provide proper isolation??
|
128
129
|
# see https://docs.sqlalchemy.org/en/20/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
|
@@ -144,7 +145,7 @@ def visits_to_sqlite(
|
|
144
145
|
insert_stmt_raw = str(insert_stmt.compile(dialect=dialect_sqlite.dialect(paramstyle='qmark')))
|
145
146
|
|
146
147
|
for chunk in chunked(vit_ok(), n=_CHUNK_BY):
|
147
|
-
srcs =
|
148
|
+
srcs = {v.src or '' for v in chunk}
|
148
149
|
new = srcs.difference(cleared)
|
149
150
|
|
150
151
|
for src in new:
|
@@ -181,7 +182,7 @@ def visits_to_sqlite(
|
|
181
182
|
for k, v in stats_changes.items():
|
182
183
|
logger.info(f'database stats changes: {k} {v}')
|
183
184
|
|
184
|
-
res:
|
185
|
+
res: list[Exception] = []
|
185
186
|
if total_ok == 0:
|
186
187
|
res.append(RuntimeError('No visits were indexed, something is probably wrong!'))
|
187
188
|
return res
|
promnesia/database/load.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
from pathlib import Path
|
2
|
-
from typing import Tuple, List
|
3
4
|
|
4
5
|
from sqlalchemy import (
|
5
|
-
create_engine,
|
6
|
-
exc,
|
7
6
|
Engine,
|
8
|
-
MetaData,
|
9
7
|
Index,
|
8
|
+
MetaData,
|
10
9
|
Table,
|
10
|
+
create_engine,
|
11
|
+
exc,
|
11
12
|
)
|
12
13
|
|
13
14
|
from .common import DbVisit, get_columns, row_to_db_visit
|
14
15
|
|
15
|
-
|
16
|
-
DbStuff = Tuple[Engine, Table]
|
16
|
+
DbStuff = tuple[Engine, Table]
|
17
17
|
|
18
18
|
|
19
19
|
def get_db_stuff(db_path: Path) -> DbStuff:
|
@@ -39,7 +39,7 @@ def get_db_stuff(db_path: Path) -> DbStuff:
|
|
39
39
|
return engine, table
|
40
40
|
|
41
41
|
|
42
|
-
def get_all_db_visits(db_path: Path) ->
|
42
|
+
def get_all_db_visits(db_path: Path) -> list[DbVisit]:
|
43
43
|
# NOTE: this is pretty inefficient if the DB is huge
|
44
44
|
# mostly intended for tests
|
45
45
|
engine, table = get_db_stuff(db_path)
|
promnesia/extract.py
CHANGED
@@ -1,20 +1,22 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
import re
|
3
|
-
import
|
4
|
-
from
|
4
|
+
from collections.abc import Iterable, Sequence
|
5
|
+
from functools import lru_cache
|
5
6
|
|
6
7
|
from .cannon import CanonifyException
|
7
8
|
from .common import (
|
8
|
-
|
9
|
-
DbVisit, Visit,
|
10
|
-
Res,
|
11
|
-
SourceName, Source,
|
9
|
+
DbVisit,
|
12
10
|
Filter,
|
11
|
+
Res,
|
12
|
+
Results,
|
13
|
+
Source,
|
14
|
+
SourceName,
|
13
15
|
Url,
|
14
|
-
|
16
|
+
Visit,
|
17
|
+
logger,
|
15
18
|
)
|
16
19
|
|
17
|
-
|
18
20
|
DEFAULT_FILTERS = (
|
19
21
|
r'^chrome-\w+://',
|
20
22
|
r'chrome://newtab',
|
@@ -53,7 +55,7 @@ def extract_visits(source: Source, *, src: SourceName) -> Iterable[Res[DbVisit]]
|
|
53
55
|
yield e
|
54
56
|
return
|
55
57
|
|
56
|
-
handled:
|
58
|
+
handled: set[Visit] = set()
|
57
59
|
try:
|
58
60
|
for p in vit:
|
59
61
|
if isinstance(p, Exception):
|
@@ -94,7 +96,7 @@ def filtered(url: Url) -> bool:
|
|
94
96
|
return any(f(url) for f in filters())
|
95
97
|
|
96
98
|
|
97
|
-
def make_filter(thing:
|
99
|
+
def make_filter(thing: str | Filter) -> Filter:
|
98
100
|
if isinstance(thing, str):
|
99
101
|
rc = re.compile(thing)
|
100
102
|
def filter_(u: str) -> bool:
|
promnesia/kjson.py
CHANGED
@@ -3,19 +3,19 @@ Some experimental ideas on JSON processing.
|
|
3
3
|
This is a bit overengineered and I admit it!
|
4
4
|
I'll make it more readable, but in the meantime feel free to open an issue if you're confused about something.
|
5
5
|
"""
|
6
|
+
from __future__ import annotations
|
6
7
|
|
7
|
-
from typing import Any,
|
8
|
+
from typing import Any, Union, cast
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
JList = List[Any]
|
10
|
+
JDict = dict[str, Any] # TODO not sure if we can do recursive..
|
11
|
+
JList = list[Any]
|
12
12
|
JPrim = Union[str, int, float] # , type(None)]
|
13
13
|
|
14
14
|
Json = Union[JDict, JList, JPrim]
|
15
15
|
|
16
|
-
JPathPart =
|
16
|
+
JPathPart = tuple[Json, Union[str, int]]
|
17
17
|
|
18
|
-
JPath =
|
18
|
+
JPath = tuple[JPathPart, ...]
|
19
19
|
|
20
20
|
|
21
21
|
class JsonProcessor:
|
@@ -36,7 +36,7 @@ class JsonProcessor:
|
|
36
36
|
if res is self.SKIP:
|
37
37
|
return
|
38
38
|
for k, v in js.items():
|
39
|
-
path = cast(JPath, jp + ((js, k), ))
|
39
|
+
path = cast(JPath, jp + ((js, k), )) # noqa: RUF005
|
40
40
|
self._do(v, path)
|
41
41
|
|
42
42
|
def do_list(self, js: JList, jp: JPath) -> None:
|
@@ -45,7 +45,7 @@ class JsonProcessor:
|
|
45
45
|
if res is self.SKIP:
|
46
46
|
return
|
47
47
|
for i, x in enumerate(js):
|
48
|
-
path = cast(JPath, jp + ((js, i), ))
|
48
|
+
path = cast(JPath, jp + ((js, i), )) # noqa: RUF005
|
49
49
|
self._do(x, path)
|
50
50
|
|
51
51
|
def _do(self, js: Json, path: JPath) -> None:
|
@@ -65,7 +65,7 @@ class JsonProcessor:
|
|
65
65
|
self._do(js, path)
|
66
66
|
|
67
67
|
@classmethod
|
68
|
-
def kpath(cls, path: JPath) ->
|
68
|
+
def kpath(cls, path: JPath) -> tuple[JPathPart, ...]:
|
69
69
|
return tuple(x[1] for x in path) # type: ignore
|
70
70
|
|
71
71
|
# TODO path is a sequence of jsons and keys?
|
@@ -73,9 +73,10 @@ class JsonProcessor:
|
|
73
73
|
def test_json_processor():
|
74
74
|
handled = []
|
75
75
|
class Proc(JsonProcessor):
|
76
|
-
def handle_dict(self, value: JDict, path):
|
76
|
+
def handle_dict(self, value: JDict, path): # noqa: ARG002
|
77
77
|
if 'skipme' in self.kpath(path): # type: ignore[comparison-overlap]
|
78
78
|
return JsonProcessor.SKIP
|
79
|
+
return None
|
79
80
|
|
80
81
|
def handle_str(self, value: str, path):
|
81
82
|
if 'http' in value:
|
promnesia/logging.py
CHANGED
promnesia/misc/install_server.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
1
|
from __future__ import annotations
|
3
2
|
|
4
3
|
import argparse
|
5
4
|
import os
|
5
|
+
import platform
|
6
6
|
import sys
|
7
7
|
import time
|
8
8
|
from pathlib import Path
|
9
|
-
import platform
|
10
|
-
import shutil
|
11
9
|
from subprocess import check_call, run
|
12
|
-
from typing import List
|
13
10
|
|
14
11
|
SYSTEM = platform.system()
|
15
12
|
UNSUPPORTED_SYSTEM = RuntimeError(f'Platform {SYSTEM} is not supported yet!')
|
@@ -59,7 +56,7 @@ def systemd(*args: str | Path, method=check_call) -> None:
|
|
59
56
|
])
|
60
57
|
|
61
58
|
|
62
|
-
def install_systemd(name: str, out: Path, launcher: str, largs:
|
59
|
+
def install_systemd(name: str, out: Path, launcher: str, largs: list[str]) -> None:
|
63
60
|
unit_name = name
|
64
61
|
|
65
62
|
import shlex
|
@@ -81,7 +78,7 @@ def install_systemd(name: str, out: Path, launcher: str, largs: List[str]) -> No
|
|
81
78
|
raise e
|
82
79
|
|
83
80
|
|
84
|
-
def install_launchd(name: str, out: Path, launcher: str, largs:
|
81
|
+
def install_launchd(name: str, out: Path, launcher: str, largs: list[str]) -> None:
|
85
82
|
service_name = name
|
86
83
|
arguments = '\n'.join(f'<string>{a}</string>' for a in [launcher, *largs])
|
87
84
|
out.write_text(LAUNCHD_TEMPLATE.format(
|
@@ -116,14 +113,16 @@ def install(args: argparse.Namespace) -> None:
|
|
116
113
|
print(f"Writing launch script to {out}", file=sys.stderr)
|
117
114
|
|
118
115
|
# ugh. we want to know whether we're invoked 'properly' as an executable or ad-hoc via scripts/promnesia
|
116
|
+
extra_exe: list[str] = []
|
119
117
|
if os.environ.get('DIRTY_RUN') is not None:
|
120
118
|
launcher = str(root() / 'scripts/promnesia')
|
121
119
|
else:
|
122
|
-
|
123
|
-
|
120
|
+
launcher = sys.executable
|
121
|
+
extra_exe = ['-m', 'promnesia']
|
124
122
|
|
125
123
|
db = args.db
|
126
124
|
largs = [
|
125
|
+
*extra_exe,
|
127
126
|
'serve',
|
128
127
|
*([] if db is None else ['--db', str(db)]),
|
129
128
|
'--timezone', args.timezone,
|
promnesia/server.py
CHANGED
@@ -1,35 +1,45 @@
|
|
1
|
-
#!/usr/bin/python3
|
2
1
|
from __future__ import annotations
|
3
2
|
|
4
3
|
import argparse
|
5
|
-
from dataclasses import dataclass
|
6
|
-
from datetime import timedelta
|
7
|
-
from functools import lru_cache
|
8
4
|
import importlib.metadata
|
9
5
|
import json
|
10
6
|
import logging
|
11
7
|
import os
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from datetime import timedelta
|
10
|
+
from functools import lru_cache
|
12
11
|
from pathlib import Path
|
13
|
-
from typing import
|
14
|
-
|
12
|
+
from typing import Any, NamedTuple, Optional, Protocol
|
15
13
|
|
14
|
+
import fastapi
|
16
15
|
import pytz
|
17
16
|
from pytz import BaseTzInfo
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
17
|
+
from sqlalchemy import (
|
18
|
+
Column,
|
19
|
+
Table,
|
20
|
+
and_,
|
21
|
+
between,
|
22
|
+
exc,
|
23
|
+
func,
|
24
|
+
literal,
|
25
|
+
or_,
|
26
|
+
select,
|
27
|
+
types,
|
28
|
+
)
|
24
29
|
from sqlalchemy.sql import text
|
30
|
+
from sqlalchemy.sql.elements import ColumnElement
|
25
31
|
|
26
|
-
|
27
|
-
from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz
|
28
32
|
from .cannon import canonify
|
33
|
+
from .common import (
|
34
|
+
DbVisit,
|
35
|
+
PathWithMtime,
|
36
|
+
default_output_dir,
|
37
|
+
get_system_tz,
|
38
|
+
setup_logger,
|
39
|
+
)
|
29
40
|
from .database.load import DbStuff, get_db_stuff, row_to_db_visit
|
30
41
|
|
31
|
-
|
32
|
-
Json = Dict[str, Any]
|
42
|
+
Json = dict[str, Any]
|
33
43
|
|
34
44
|
app = fastapi.FastAPI()
|
35
45
|
|
@@ -65,7 +75,7 @@ class ServerConfig(NamedTuple):
|
|
65
75
|
})
|
66
76
|
|
67
77
|
@classmethod
|
68
|
-
def from_str(cls, cfgs: str) ->
|
78
|
+
def from_str(cls, cfgs: str) -> ServerConfig:
|
69
79
|
d = json.loads(cfgs)
|
70
80
|
return cls(
|
71
81
|
db =Path (d['db']),
|
@@ -111,7 +121,7 @@ def as_json(v: DbVisit) -> Json:
|
|
111
121
|
}
|
112
122
|
|
113
123
|
|
114
|
-
def get_db_path(check: bool=True) -> Path:
|
124
|
+
def get_db_path(*, check: bool=True) -> Path:
|
115
125
|
db = EnvConfig.get().db
|
116
126
|
if check:
|
117
127
|
assert db.exists(), db
|
@@ -125,7 +135,7 @@ def _get_stuff(db_path: PathWithMtime) -> DbStuff:
|
|
125
135
|
return get_db_stuff(db_path=db_path.path)
|
126
136
|
|
127
137
|
|
128
|
-
def get_stuff(db_path:
|
138
|
+
def get_stuff(db_path: Path | None=None) -> DbStuff: # TODO better name
|
129
139
|
# ok, it will always load from the same db file; but intermediate would be kinda an optional dump.
|
130
140
|
if db_path is None:
|
131
141
|
db_path = get_db_path()
|
@@ -136,7 +146,7 @@ def db_stats(db_path: Path) -> Json:
|
|
136
146
|
engine, table = get_stuff(db_path)
|
137
147
|
query = select(func.count()).select_from(table)
|
138
148
|
with engine.connect() as conn:
|
139
|
-
total =
|
149
|
+
[(total,)] = conn.execute(query)
|
140
150
|
return {
|
141
151
|
'total_visits': total,
|
142
152
|
}
|
@@ -172,17 +182,17 @@ def search_common(url: str, where: Where) -> VisitsResponse:
|
|
172
182
|
with engine.connect() as conn:
|
173
183
|
try:
|
174
184
|
# TODO make more defensive here
|
175
|
-
visits:
|
185
|
+
visits: list[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
|
176
186
|
except exc.OperationalError as e:
|
177
187
|
if getattr(e, 'msg', None) == 'no such table: visits':
|
178
|
-
logger.
|
188
|
+
logger.warning('you may have to run indexer first!')
|
179
189
|
#result['visits'] = [{an error with a msg}] # TODO
|
180
190
|
#return result
|
181
191
|
raise
|
182
192
|
|
183
193
|
logger.debug('got %d visits from db', len(visits))
|
184
194
|
|
185
|
-
vlist:
|
195
|
+
vlist: list[DbVisit] = []
|
186
196
|
for vis in visits:
|
187
197
|
dt = vis.dt
|
188
198
|
if dt.tzinfo is None: # FIXME need this for /visits endpoint as well?
|
@@ -225,7 +235,7 @@ def status() -> Json:
|
|
225
235
|
logger.exception(e)
|
226
236
|
stats = {'ERROR': str(e)}
|
227
237
|
|
228
|
-
version:
|
238
|
+
version: str | None
|
229
239
|
try:
|
230
240
|
version = get_version()
|
231
241
|
except Exception as e:
|
@@ -299,7 +309,7 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
|
|
299
309
|
|
300
310
|
return search_common(
|
301
311
|
url='http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this
|
302
|
-
where=lambda table, url: between(
|
312
|
+
where=lambda table, url: between( # noqa: ARG005
|
303
313
|
func.strftime(
|
304
314
|
'%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00
|
305
315
|
# this is a bit fragile, relies on cachew internal timestamp format, e.g.
|
@@ -322,25 +332,26 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
|
|
322
332
|
_NO_VERSION = (0, 11, 14)
|
323
333
|
_LATEST = (9999, 9999, 9999)
|
324
334
|
|
325
|
-
def as_version(version: str) ->
|
335
|
+
def as_version(version: str) -> tuple[int, int, int]:
|
326
336
|
if version == '':
|
327
337
|
return _NO_VERSION
|
328
338
|
try:
|
329
339
|
[v1, v2, v3] = map(int, version.split('.'))
|
330
|
-
return (v1, v2, v3)
|
331
340
|
except Exception as e:
|
332
341
|
logger = get_logger()
|
333
342
|
logger.error('error while parsing version %s', version)
|
334
343
|
logger.exception(e)
|
335
344
|
return _LATEST
|
345
|
+
else:
|
346
|
+
return (v1, v2, v3)
|
336
347
|
|
337
348
|
|
338
349
|
@dataclass
|
339
350
|
class VisitedRequest:
|
340
|
-
urls:
|
351
|
+
urls: list[str]
|
341
352
|
client_version: str = ''
|
342
353
|
|
343
|
-
VisitedResponse =
|
354
|
+
VisitedResponse = list[Optional[Json]]
|
344
355
|
|
345
356
|
@app.get ('/visited', response_model=VisitedResponse)
|
346
357
|
@app.post('/visited', response_model=VisitedResponse)
|
@@ -355,7 +366,7 @@ def visited(request: VisitedRequest) -> VisitedResponse:
|
|
355
366
|
version = as_version(client_version)
|
356
367
|
|
357
368
|
nurls = [canonify(u) for u in urls]
|
358
|
-
snurls =
|
369
|
+
snurls = sorted(set(nurls))
|
359
370
|
|
360
371
|
if len(snurls) == 0:
|
361
372
|
return []
|
@@ -388,7 +399,7 @@ SELECT queried, visits.*
|
|
388
399
|
# brings down large queries to 50ms...
|
389
400
|
with engine.connect() as conn:
|
390
401
|
res = list(conn.execute(query))
|
391
|
-
present:
|
402
|
+
present: dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
|
392
403
|
results = []
|
393
404
|
for nu in nurls:
|
394
405
|
r = present.get(nu, None)
|