promnesia 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__main__.py +58 -50
- promnesia/cannon.py +4 -4
- promnesia/common.py +57 -38
- promnesia/compare.py +3 -2
- promnesia/compat.py +6 -65
- promnesia/config.py +4 -2
- promnesia/database/common.py +66 -0
- promnesia/database/dump.py +187 -0
- promnesia/{read_db.py → database/load.py} +10 -11
- promnesia/extract.py +1 -0
- promnesia/kjson.py +1 -1
- promnesia/logging.py +14 -14
- promnesia/misc/__init__.pyi +0 -0
- promnesia/misc/config_example.py +1 -2
- promnesia/misc/install_server.py +5 -4
- promnesia/server.py +24 -24
- promnesia/sources/__init__.pyi +0 -0
- promnesia/sources/auto.py +12 -7
- promnesia/sources/browser.py +80 -293
- promnesia/sources/browser_legacy.py +298 -0
- promnesia/sources/demo.py +18 -2
- promnesia/sources/filetypes.py +8 -0
- promnesia/sources/github.py +2 -2
- promnesia/sources/hackernews.py +1 -2
- promnesia/sources/hypothesis.py +1 -1
- promnesia/sources/markdown.py +15 -15
- promnesia/sources/org.py +7 -3
- promnesia/sources/plaintext.py +3 -1
- promnesia/sources/reddit.py +2 -2
- promnesia/sources/rss.py +5 -1
- promnesia/sources/shellcmd.py +6 -2
- promnesia/sources/signal.py +29 -20
- promnesia/sources/smscalls.py +8 -1
- promnesia/sources/stackexchange.py +2 -2
- promnesia/sources/takeout.py +132 -12
- promnesia/sources/takeout_legacy.py +10 -2
- promnesia/sources/telegram.py +79 -123
- promnesia/sources/telegram_legacy.py +117 -0
- promnesia/sources/vcs.py +1 -1
- promnesia/sources/viber.py +6 -15
- promnesia/sources/website.py +1 -1
- promnesia/sqlite.py +42 -0
- promnesia/tests/__init__.py +0 -0
- promnesia/tests/common.py +137 -0
- promnesia/tests/server_helper.py +64 -0
- promnesia/tests/sources/__init__.py +0 -0
- promnesia/tests/sources/test_auto.py +66 -0
- promnesia/tests/sources/test_filetypes.py +42 -0
- promnesia/tests/sources/test_hypothesis.py +39 -0
- promnesia/tests/sources/test_org.py +65 -0
- promnesia/tests/sources/test_plaintext.py +26 -0
- promnesia/tests/sources/test_shellcmd.py +22 -0
- promnesia/tests/sources/test_takeout.py +58 -0
- promnesia/tests/test_cannon.py +325 -0
- promnesia/tests/test_cli.py +42 -0
- promnesia/tests/test_compare.py +30 -0
- promnesia/tests/test_config.py +290 -0
- promnesia/tests/test_db_dump.py +223 -0
- promnesia/tests/test_extract.py +61 -0
- promnesia/tests/test_extract_urls.py +43 -0
- promnesia/tests/test_indexer.py +245 -0
- promnesia/tests/test_server.py +292 -0
- promnesia/tests/test_traverse.py +41 -0
- promnesia/tests/utils.py +35 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +14 -19
- promnesia-1.2.20240810.dist-info/RECORD +83 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
- promnesia/dump.py +0 -105
- promnesia-1.1.20230129.dist-info/RECORD +0 -55
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
- {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0
promnesia/__main__.py
CHANGED
@@ -1,21 +1,25 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import argparse
|
2
|
-
import
|
4
|
+
import ast
|
5
|
+
import importlib
|
3
6
|
import inspect
|
4
|
-
import
|
5
|
-
from typing import List, Tuple, Optional, Dict, Sequence, Iterable, Iterator, Union
|
7
|
+
import os
|
6
8
|
from pathlib import Path
|
7
|
-
|
8
|
-
from
|
9
|
-
|
9
|
+
import shutil
|
10
|
+
from subprocess import run, check_call, Popen
|
11
|
+
import sys
|
12
|
+
from tempfile import TemporaryDirectory, gettempdir
|
13
|
+
from typing import Callable, Sequence, Iterable, Iterator, Union
|
10
14
|
|
11
15
|
|
12
16
|
from . import config
|
13
17
|
from . import server
|
14
18
|
from .misc import install_server
|
15
|
-
from .common import PathIsh, logger, get_tmpdir, DbVisit, Res
|
19
|
+
from .common import Extractor, PathIsh, logger, get_tmpdir, DbVisit, Res
|
16
20
|
from .common import Source, get_system_tz, user_config_file, default_config_path
|
17
|
-
from .dump import visits_to_sqlite
|
18
|
-
from .extract import extract_visits
|
21
|
+
from .database.dump import visits_to_sqlite
|
22
|
+
from .extract import extract_visits
|
19
23
|
|
20
24
|
|
21
25
|
def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]:
|
@@ -36,7 +40,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
|
|
36
40
|
|
37
41
|
for i, source in enumerate(sources):
|
38
42
|
# TODO why would it not be present??
|
39
|
-
name = getattr(source, "name", None)
|
43
|
+
name: str | None = getattr(source, "name", None)
|
40
44
|
if name and is_subset_sources:
|
41
45
|
matched = name in sources_subset or i in sources_subset
|
42
46
|
if matched:
|
@@ -72,7 +76,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
|
|
72
76
|
|
73
77
|
def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]:
|
74
78
|
# also keep & return errors for further display
|
75
|
-
errors:
|
79
|
+
errors: list[Exception] = []
|
76
80
|
def it() -> Iterable[Res[DbVisit]]:
|
77
81
|
for v in iter_all_visits(sources_subset):
|
78
82
|
if isinstance(v, Exception):
|
@@ -93,38 +97,38 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
|
|
93
97
|
|
94
98
|
|
95
99
|
def do_index(
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
100
|
+
config_file: Path,
|
101
|
+
dry: bool=False,
|
102
|
+
sources_subset: Iterable[Union[str, int]]=(),
|
103
|
+
overwrite_db: bool=False,
|
104
|
+
) -> Sequence[Exception]:
|
101
105
|
config.load_from(config_file) # meh.. should be cleaner
|
102
106
|
try:
|
103
107
|
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
|
104
108
|
finally:
|
109
|
+
# this reset is mainly for tests, so we don't end up reusing the same config by accident
|
105
110
|
config.reset()
|
106
111
|
if len(errors) > 0:
|
107
112
|
logger.error('%d errors, printing them out:', len(errors))
|
108
113
|
for e in errors:
|
109
114
|
logger.exception(e)
|
110
115
|
logger.error('%d errors, exit code 1', len(errors))
|
111
|
-
|
116
|
+
return errors
|
112
117
|
|
113
118
|
|
114
|
-
def demo_sources():
|
115
|
-
def lazy(name: str):
|
119
|
+
def demo_sources() -> dict[str, Callable[[], Extractor]]:
|
120
|
+
def lazy(name: str) -> Callable[[], Extractor]:
|
116
121
|
# helper to avoid failed imports etc, since people might be lacking necessary dependencies
|
117
|
-
def inner(
|
122
|
+
def inner() -> Extractor:
|
118
123
|
from . import sources
|
119
|
-
|
120
|
-
module = importlib.import_module('promnesia.sources' + '.' + name)
|
124
|
+
module = importlib.import_module(f'promnesia.sources.{name}')
|
121
125
|
return getattr(module, 'index')
|
122
126
|
return inner
|
123
127
|
|
124
128
|
res = {}
|
125
|
-
import ast
|
126
129
|
import promnesia.sources
|
127
|
-
|
130
|
+
path: list[str] = getattr(promnesia.sources, '__path__') # should be present
|
131
|
+
for p in path:
|
128
132
|
for x in sorted(Path(p).glob('*.py')):
|
129
133
|
a = ast.parse(x.read_text())
|
130
134
|
candidates = [c for c in a.body if getattr(c, 'name', None) == 'index']
|
@@ -137,14 +141,13 @@ def do_demo(
|
|
137
141
|
*,
|
138
142
|
index_as: str,
|
139
143
|
params: Sequence[str],
|
140
|
-
port:
|
141
|
-
config_file:
|
144
|
+
port: str | None,
|
145
|
+
config_file: Path | None,
|
142
146
|
dry: bool=False,
|
143
147
|
name: str='demo',
|
144
148
|
sources_subset: Iterable[Union[str, int]]=(),
|
145
149
|
overwrite_db: bool=False,
|
146
150
|
) -> None:
|
147
|
-
from pprint import pprint
|
148
151
|
with TemporaryDirectory() as tdir:
|
149
152
|
outdir = Path(tdir)
|
150
153
|
|
@@ -185,7 +188,6 @@ def do_demo(
|
|
185
188
|
|
186
189
|
|
187
190
|
def read_example_config() -> str:
|
188
|
-
import inspect
|
189
191
|
from .misc import config_example
|
190
192
|
return inspect.getsource(config_example)
|
191
193
|
|
@@ -214,19 +216,25 @@ def config_check(args: argparse.Namespace) -> None:
|
|
214
216
|
|
215
217
|
|
216
218
|
def _config_check(cfg: Path) -> Iterable[Exception]:
|
217
|
-
from .compat import run
|
218
|
-
|
219
219
|
logger.info('config: %s', cfg)
|
220
220
|
|
221
|
-
def check(cmd) -> Iterable[Exception]:
|
221
|
+
def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
|
222
222
|
logger.debug(' '.join(map(str, cmd)))
|
223
|
-
res = run(cmd)
|
223
|
+
res = run(cmd, **kwargs)
|
224
224
|
if res.returncode > 0:
|
225
225
|
yield Exception()
|
226
226
|
|
227
227
|
logger.info('Checking syntax...')
|
228
|
-
cmd = [sys.executable, '-m', 'compileall', cfg]
|
229
|
-
yield from check(
|
228
|
+
cmd: list[str | Path] = [sys.executable, '-m', 'compileall', cfg]
|
229
|
+
yield from check(
|
230
|
+
cmd,
|
231
|
+
env={
|
232
|
+
**os.environ,
|
233
|
+
# if config is on read only partition, the command would fail due to generated bytecode
|
234
|
+
# so put it in the temporary directory instead
|
235
|
+
'PYTHONPYCACHEPREFIX': gettempdir()
|
236
|
+
},
|
237
|
+
)
|
230
238
|
|
231
239
|
# todo not sure if should be more defensive than check_call here
|
232
240
|
logger.info('Checking type safety...')
|
@@ -265,18 +273,16 @@ def cli_doctor_db(args: argparse.Namespace) -> None:
|
|
265
273
|
check_call(cmd)
|
266
274
|
|
267
275
|
bro = 'sqlitebrowser'
|
268
|
-
import shutil
|
269
276
|
if not shutil.which(bro):
|
270
277
|
logger.error(f'Install {bro} to inspect the database!')
|
271
278
|
sys.exit(1)
|
272
279
|
cmd = [bro, str(db)]
|
273
280
|
logger.debug(f'Running {cmd}')
|
274
|
-
from .compat import Popen
|
275
281
|
Popen(cmd)
|
276
282
|
|
277
283
|
|
278
284
|
def cli_doctor_server(args: argparse.Namespace) -> None:
|
279
|
-
port = args.port
|
285
|
+
port: str = args.port
|
280
286
|
endpoint = f'http://localhost:{port}/status'
|
281
287
|
cmd = ['curl', endpoint]
|
282
288
|
logger.info(f'Running {cmd}')
|
@@ -296,12 +302,11 @@ def _ordinal_or_name(s: str) -> Union[str, int]:
|
|
296
302
|
def main() -> None:
|
297
303
|
# TODO longer, literate description?
|
298
304
|
|
299
|
-
def add_index_args(parser: argparse.ArgumentParser, default_config_path:
|
305
|
+
def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh | None = None) -> None:
|
300
306
|
"""
|
301
307
|
:param default_config_path:
|
302
308
|
if not given, all :func:`demo_sources()` are run
|
303
309
|
"""
|
304
|
-
register_argparse_extend_action_in_pre_py38(parser)
|
305
310
|
parser.add_argument('--config', type=Path, default=default_config_path, help='Config path')
|
306
311
|
parser.add_argument('--dry', action='store_true', help="Dry run, won't touch the database, only print the results out")
|
307
312
|
parser.add_argument(
|
@@ -322,14 +327,14 @@ def main() -> None:
|
|
322
327
|
)
|
323
328
|
|
324
329
|
F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
|
325
|
-
p = argparse.ArgumentParser(formatter_class=F)
|
330
|
+
p = argparse.ArgumentParser(formatter_class=F)
|
326
331
|
subp = p.add_subparsers(dest='mode', )
|
327
332
|
ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
|
328
333
|
add_index_args(ep, default_config_path())
|
329
334
|
# TODO use some way to override or provide config only via cmdline?
|
330
335
|
ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it")
|
331
336
|
|
332
|
-
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F)
|
337
|
+
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F)
|
333
338
|
server.setup_parser(sp)
|
334
339
|
|
335
340
|
ap = subp.add_parser('demo', help='Demo mode: index and serve a directory in single command', formatter_class=F)
|
@@ -378,8 +383,9 @@ def main() -> None:
|
|
378
383
|
|
379
384
|
args = p.parse_args()
|
380
385
|
|
386
|
+
mode: str | None = args.mode
|
381
387
|
# TODO is there a way to print full help? i.e. for all subparsers
|
382
|
-
if
|
388
|
+
if mode is None:
|
383
389
|
print('ERROR: Please specify a mode', file=sys.stderr)
|
384
390
|
p.print_help(sys.stderr)
|
385
391
|
sys.exit(1)
|
@@ -391,16 +397,18 @@ def main() -> None:
|
|
391
397
|
# worst case -- could use database?
|
392
398
|
|
393
399
|
with get_tmpdir() as tdir: # TODO??
|
394
|
-
if
|
395
|
-
do_index(
|
400
|
+
if mode == 'index':
|
401
|
+
errors = do_index(
|
396
402
|
config_file=args.config,
|
397
403
|
dry=args.dry,
|
398
404
|
sources_subset=args.sources,
|
399
405
|
overwrite_db=args.overwrite,
|
400
406
|
)
|
401
|
-
|
407
|
+
if len(errors) > 0:
|
408
|
+
sys.exit(1)
|
409
|
+
elif mode == 'serve':
|
402
410
|
server.run(args)
|
403
|
-
elif
|
411
|
+
elif mode == 'demo':
|
404
412
|
# TODO not sure if 'as' is that useful
|
405
413
|
# something like Telegram/Takeout is too hard to setup to justify adhoc mode like this?
|
406
414
|
do_demo(
|
@@ -413,14 +421,14 @@ def main() -> None:
|
|
413
421
|
sources_subset=args.sources,
|
414
422
|
overwrite_db=args.overwrite,
|
415
423
|
)
|
416
|
-
elif
|
424
|
+
elif mode == 'install-server': # todo rename to 'autostart' or something?
|
417
425
|
install_server.install(args)
|
418
|
-
elif
|
426
|
+
elif mode == 'config':
|
419
427
|
args.func(args)
|
420
|
-
elif
|
428
|
+
elif mode == 'doctor':
|
421
429
|
args.func(args)
|
422
430
|
else:
|
423
|
-
raise AssertionError(f'unexpected mode {
|
431
|
+
raise AssertionError(f'unexpected mode {mode}')
|
424
432
|
|
425
433
|
if __name__ == '__main__':
|
426
434
|
main()
|
promnesia/cannon.py
CHANGED
@@ -422,7 +422,7 @@ def canonify(url: str) -> str:
|
|
422
422
|
qq = [(k, v) for i, k, v in sorted(iqq)]
|
423
423
|
# TODO still not sure what we should do..
|
424
424
|
# quote_plus replaces %20 with +, not sure if we want it...
|
425
|
-
query = urlencode(qq, quote_via=quote_via)
|
425
|
+
query = urlencode(qq, quote_via=quote_via)
|
426
426
|
|
427
427
|
path = _quote_path(path)
|
428
428
|
|
@@ -683,7 +683,7 @@ def domains(it): # pragma: no cover
|
|
683
683
|
try:
|
684
684
|
nurl = canonify(url)
|
685
685
|
except CanonifyException as e:
|
686
|
-
print(f"ERROR while normalising! {
|
686
|
+
print(f"ERROR while normalising! {url} {e}")
|
687
687
|
c['ERROR'] += 1
|
688
688
|
continue
|
689
689
|
else:
|
@@ -718,7 +718,7 @@ def groups(it, args): # pragma: no cover
|
|
718
718
|
try:
|
719
719
|
nurl = canonify(url)
|
720
720
|
except CanonifyException as e:
|
721
|
-
print(f"ERROR while normalising! {
|
721
|
+
print(f"ERROR while normalising! {url} {e}")
|
722
722
|
continue
|
723
723
|
udom = nurl[:nurl.find('/')]
|
724
724
|
usplit = udom.split('.')
|
@@ -818,7 +818,7 @@ def main() -> None: # pragma: no cover
|
|
818
818
|
|
819
819
|
- running comparison
|
820
820
|
sqlite3 promnesia.sqlite 'select distinct orig_url from visits where norm_url like "%twitter%" order by orig_url' | src/promnesia/cannon.py
|
821
|
-
''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100)
|
821
|
+
''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100)
|
822
822
|
)
|
823
823
|
p.add_argument('input', nargs='?')
|
824
824
|
p.add_argument('--human', action='store_true')
|
promnesia/common.py
CHANGED
@@ -1,22 +1,25 @@
|
|
1
|
-
from
|
1
|
+
from __future__ import annotations
|
2
|
+
|
2
3
|
from contextlib import contextmanager
|
3
4
|
from datetime import datetime, date
|
4
|
-
import
|
5
|
-
from typing import NamedTuple, Set, Iterable, Dict, TypeVar, Callable, List, Optional, Union, Any, Collection, Sequence, Tuple, TypeVar, TYPE_CHECKING
|
6
|
-
from pathlib import Path
|
5
|
+
from functools import lru_cache
|
7
6
|
from glob import glob
|
8
7
|
import itertools
|
9
|
-
from more_itertools import intersperse
|
10
8
|
import logging
|
11
|
-
|
9
|
+
import os
|
10
|
+
from pathlib import Path
|
12
11
|
import shutil
|
12
|
+
from subprocess import run, PIPE, Popen
|
13
13
|
from timeit import default_timer as timer
|
14
14
|
from types import ModuleType
|
15
|
+
from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
|
15
16
|
import warnings
|
16
17
|
|
18
|
+
from more_itertools import intersperse
|
17
19
|
import pytz
|
18
20
|
|
19
21
|
from .cannon import canonify
|
22
|
+
from .compat import removeprefix
|
20
23
|
|
21
24
|
|
22
25
|
_is_windows = os.name == 'nt'
|
@@ -74,14 +77,26 @@ class Loc(NamedTuple):
|
|
74
77
|
# but generally, it will be
|
75
78
|
# (url|file)(linenumber|json_path|anchor)
|
76
79
|
|
80
|
+
|
81
|
+
@lru_cache(None)
|
82
|
+
def warn_once(message: str) -> None:
|
83
|
+
# you'd think that warnings module already logs warnings only once per line..
|
84
|
+
# but sadly it's not the case
|
85
|
+
# see https://github.com/karlicoss/python_duplicate_warnings_investigation/blob/master/test.py
|
86
|
+
warnings.warn(message, stacklevel=2)
|
87
|
+
|
88
|
+
|
89
|
+
def _warn_no_xdg_mime() -> None:
|
90
|
+
warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
|
91
|
+
|
92
|
+
|
77
93
|
@lru_cache(1)
|
78
94
|
def _detect_mime_handler() -> str:
|
79
95
|
def exists(what: str) -> bool:
|
80
|
-
from .compat import run, PIPE
|
81
96
|
try:
|
82
97
|
r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
|
83
|
-
except FileNotFoundError:
|
84
|
-
|
98
|
+
except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason
|
99
|
+
_warn_no_xdg_mime()
|
85
100
|
return False
|
86
101
|
if r.returncode > 0:
|
87
102
|
warnings.warn('xdg-mime failed') # hopefully rest is in stderr
|
@@ -101,6 +116,7 @@ def _detect_mime_handler() -> str:
|
|
101
116
|
result = 'emacs:'
|
102
117
|
|
103
118
|
# 2. now try to use newer editor:// thing
|
119
|
+
# TODO flip order here? should rely on editor:// first?
|
104
120
|
|
105
121
|
# TODO would be nice to collect warnings and display at the end
|
106
122
|
if not exists('editor'):
|
@@ -191,7 +207,7 @@ def get_logger() -> logging.Logger:
|
|
191
207
|
import tempfile
|
192
208
|
# kinda singleton
|
193
209
|
@lru_cache(1)
|
194
|
-
def get_tmpdir() -> tempfile.TemporaryDirectory:
|
210
|
+
def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
|
195
211
|
# todo use appdirs?
|
196
212
|
tdir = tempfile.TemporaryDirectory(suffix="promnesia")
|
197
213
|
return tdir
|
@@ -284,9 +300,10 @@ def _guess_name(thing: PreSource) -> str:
|
|
284
300
|
guess = thing.__module__
|
285
301
|
|
286
302
|
dflt = 'promnesia.sources.'
|
287
|
-
|
288
|
-
|
289
|
-
|
303
|
+
guess = removeprefix(guess, prefix=dflt)
|
304
|
+
if guess == 'config':
|
305
|
+
# this happens when we define a lambda in config or something without properly wrapping in Source
|
306
|
+
logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
|
290
307
|
return guess
|
291
308
|
|
292
309
|
|
@@ -296,7 +313,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
|
|
296
313
|
if hasattr(sourceish, 'index'): # must be a module
|
297
314
|
res = getattr(sourceish, 'index')
|
298
315
|
else:
|
299
|
-
res = sourceish
|
316
|
+
res = sourceish
|
300
317
|
return res
|
301
318
|
|
302
319
|
|
@@ -316,12 +333,17 @@ class Source:
|
|
316
333
|
self.extractor: Extractor = lambda: self.ff(*self.args, **self.kwargs)
|
317
334
|
if src is not None:
|
318
335
|
warnings.warn("'src' argument is deprecated, please use 'name' instead", DeprecationWarning)
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
336
|
+
if name != '':
|
337
|
+
self.name = name
|
338
|
+
elif src != '':
|
339
|
+
self.name = src
|
340
|
+
else:
|
341
|
+
try:
|
342
|
+
name_guess = _guess_name(ff)
|
343
|
+
except:
|
344
|
+
# todo warn?
|
345
|
+
name_guess = ''
|
346
|
+
self.name = name_guess
|
325
347
|
|
326
348
|
@property
|
327
349
|
def description(self) -> str:
|
@@ -370,7 +392,7 @@ def appdirs():
|
|
370
392
|
under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
|
371
393
|
# todo actually use test name?
|
372
394
|
name = 'promnesia-test' if under_test else 'promnesia'
|
373
|
-
import appdirs as ad # type: ignore[import]
|
395
|
+
import appdirs as ad # type: ignore[import-untyped]
|
374
396
|
return ad.AppDirs(appname=name)
|
375
397
|
|
376
398
|
|
@@ -460,13 +482,13 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
|
|
460
482
|
ignore_args = []
|
461
483
|
if ignore:
|
462
484
|
# Add a statement that excludes the folder
|
463
|
-
|
485
|
+
_ignore_args = [['--exclude', f'{n}'] for n in ignore]
|
464
486
|
# Flatten the list of lists
|
465
|
-
|
487
|
+
ignore_args = list(itertools.chain(*_ignore_args))
|
466
488
|
|
467
489
|
return [
|
468
490
|
*extra_fd_args(),
|
469
|
-
*
|
491
|
+
*ignore_args,
|
470
492
|
*(['--follow'] if follow else []),
|
471
493
|
'--type', 'f',
|
472
494
|
'.',
|
@@ -491,7 +513,6 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
|
|
491
513
|
yield from (Path(r) / f for f in files if f not in ignore)
|
492
514
|
return
|
493
515
|
|
494
|
-
from .compat import Popen, PIPE
|
495
516
|
cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
|
496
517
|
# try to use fd.. it cooperates well with gitignore etc, also faster than find
|
497
518
|
for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
|
@@ -516,17 +537,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
|
|
516
537
|
def get_system_zone() -> str:
|
517
538
|
try:
|
518
539
|
import tzlocal
|
519
|
-
|
520
|
-
try:
|
521
|
-
# 4.0 way
|
522
|
-
return tzlocal.get_localzone_name() # type: ignore[attr-defined]
|
523
|
-
except AttributeError as e:
|
524
|
-
# 2.0 way
|
525
|
-
zone = tzlocal.get_localzone().zone # type: ignore[attr-defined]
|
526
|
-
# see https://github.com/python/typeshed/blame/968fd6d01d23470e0c8368e7ee7c43f54aaedc0e/stubs/pytz/pytz/tzinfo.pyi#L6
|
527
|
-
# it says all concrete instances should not be None
|
528
|
-
assert zone is not None
|
529
|
-
return zone
|
540
|
+
return tzlocal.get_localzone_name()
|
530
541
|
except Exception as e:
|
531
542
|
logger.exception(e)
|
532
543
|
logger.error("Couldn't determine system timezone. Falling back to UTC. Please report this as a bug!")
|
@@ -540,7 +551,7 @@ def get_system_tz() -> pytz.BaseTzInfo:
|
|
540
551
|
return pytz.timezone(zone)
|
541
552
|
except Exception as e:
|
542
553
|
logger.exception(e)
|
543
|
-
logger.error(
|
554
|
+
logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
|
544
555
|
return pytz.utc
|
545
556
|
|
546
557
|
# used in misc/install_server.py
|
@@ -578,7 +589,7 @@ def default_config_path() -> Path:
|
|
578
589
|
|
579
590
|
|
580
591
|
@contextmanager
|
581
|
-
def measure(tag: str='', *, logger, unit: str='ms'):
|
592
|
+
def measure(tag: str='', *, logger: logging.Logger, unit: str='ms'):
|
582
593
|
before = timer()
|
583
594
|
yield lambda: timer() - before
|
584
595
|
after = timer()
|
@@ -586,3 +597,11 @@ def measure(tag: str='', *, logger, unit: str='ms'):
|
|
586
597
|
mult = {'s': 1, 'ms': 10**3, 'us': 10**6}[unit]
|
587
598
|
xx = secs * mult
|
588
599
|
logger.debug(f'[{tag}]: {xx:.1f}{unit} elapsed')
|
600
|
+
|
601
|
+
|
602
|
+
def is_sqlite_db(x: Path) -> bool:
|
603
|
+
return x.is_file() and mime(x) in {
|
604
|
+
'application/x-sqlite3',
|
605
|
+
'application/vnd.sqlite3',
|
606
|
+
# TODO this mime can also match wal files/journals, not sure
|
607
|
+
}
|
promnesia/compare.py
CHANGED
@@ -8,6 +8,7 @@ from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
|
|
8
8
|
|
9
9
|
|
10
10
|
from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
|
11
|
+
from .database.load import row_to_db_visit
|
11
12
|
|
12
13
|
# TODO include latest too?
|
13
14
|
# from cconfig import ignore, filtered
|
@@ -139,10 +140,10 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
|
|
139
140
|
this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
|
140
141
|
|
141
142
|
from promnesia.server import _get_stuff # TODO ugh
|
142
|
-
engine,
|
143
|
+
engine, table = _get_stuff(PathWithMtime.make(f))
|
143
144
|
|
144
145
|
with engine.connect() as conn:
|
145
|
-
vis = [
|
146
|
+
vis = [row_to_db_visit(row) for row in conn.execute(table.select())]
|
146
147
|
|
147
148
|
if last is not None:
|
148
149
|
between = f'{last_dts}:{this_dts}'
|
promnesia/compat.py
CHANGED
@@ -1,71 +1,12 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
from
|
4
|
-
|
5
|
-
|
6
|
-
PathIsh = Union[Path, str]
|
7
|
-
Paths = Sequence[PathIsh]
|
8
|
-
|
9
|
-
# TLDR: py37 on windows has an annoying bug.. https://github.com/karlicoss/promnesia/issues/91#issuecomment-701051074
|
10
|
-
def _fix(args: Paths) -> List[str]:
|
11
|
-
assert not isinstance(args, str), args # just to prevent shell=True calls...
|
12
|
-
return list(map(str, args))
|
13
|
-
|
14
|
-
|
15
|
-
import argparse
|
16
|
-
|
17
|
-
def register_argparse_extend_action_in_pre_py38(parser: argparse.ArgumentParser):
|
18
|
-
import sys
|
19
|
-
|
20
|
-
if sys.version_info < (3, 8):
|
21
|
-
|
22
|
-
class ExtendAction(argparse.Action):
|
23
|
-
|
24
|
-
def __call__(self, parser, namespace, values, option_string=None):
|
25
|
-
items = getattr(namespace, self.dest) or []
|
26
|
-
items.extend(values)
|
27
|
-
setattr(namespace, self.dest, items)
|
28
|
-
|
29
|
-
|
30
|
-
parser.register('action', 'extend', ExtendAction)
|
31
|
-
|
32
|
-
|
33
|
-
import subprocess
|
34
|
-
from subprocess import PIPE # for convenience?
|
35
|
-
|
36
|
-
|
37
|
-
if TYPE_CHECKING:
|
38
|
-
from subprocess import run, check_call, check_output, Popen
|
39
|
-
else:
|
40
|
-
def run(args: Paths, **kwargs) -> subprocess.CompletedProcess:
|
41
|
-
return subprocess.run(_fix(args), **kwargs)
|
42
|
-
|
43
|
-
def check_call(args: Paths, **kwargs) -> None:
|
44
|
-
subprocess.check_call(_fix(args), **kwargs)
|
45
|
-
|
46
|
-
def check_output(args: Paths, **kwargs) -> bytes:
|
47
|
-
return subprocess.check_output(_fix(args), **kwargs)
|
48
|
-
|
49
|
-
def Popen(args: Paths, **kwargs) -> subprocess.Popen:
|
50
|
-
return subprocess.Popen(_fix(args), **kwargs)
|
1
|
+
## we used to have compat fixes here for these for python3.7
|
2
|
+
## keeping in case any sources depended on compat functions
|
3
|
+
from subprocess import PIPE, run, check_call, check_output, Popen
|
4
|
+
from typing import Protocol, Literal
|
5
|
+
##
|
51
6
|
|
52
7
|
|
53
8
|
# can remove after python3.9
|
54
9
|
def removeprefix(text: str, prefix: str) -> str:
|
55
10
|
if text.startswith(prefix):
|
56
11
|
return text[len(prefix):]
|
57
|
-
return text
|
58
|
-
|
59
|
-
|
60
|
-
# TODO Deprecate instead, they shouldn't be exported form this module
|
61
|
-
# del PathIsh
|
62
|
-
# del Paths
|
63
|
-
|
64
|
-
if sys.version_info[:2] >= (3, 8):
|
65
|
-
from typing import Protocol
|
66
|
-
else:
|
67
|
-
if TYPE_CHECKING:
|
68
|
-
from typing_extensions import Protocol # type: ignore[misc]
|
69
|
-
else:
|
70
|
-
# todo could also use NamedTuple?
|
71
|
-
Protocol = object
|
12
|
+
return text
|
promnesia/config.py
CHANGED
@@ -6,7 +6,7 @@ import importlib
|
|
6
6
|
import importlib.util
|
7
7
|
import warnings
|
8
8
|
|
9
|
-
from .common import PathIsh,
|
9
|
+
from .common import PathIsh, default_output_dir, default_cache_dir
|
10
10
|
from .common import Res, Source, DbVisit
|
11
11
|
|
12
12
|
|
@@ -69,6 +69,8 @@ class Config(NamedTuple):
|
|
69
69
|
|
70
70
|
@property
|
71
71
|
def cache_dir(self) -> Optional[Path]:
|
72
|
+
# TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
|
73
|
+
# keeping just in case for now
|
72
74
|
cd = self.CACHE_DIR
|
73
75
|
cpath: Optional[Path]
|
74
76
|
if cd is None:
|
@@ -127,7 +129,7 @@ def import_config(config_file: PathIsh) -> Config:
|
|
127
129
|
spec = importlib.util.spec_from_file_location(name, p); assert spec is not None
|
128
130
|
mod = importlib.util.module_from_spec(spec); assert mod is not None
|
129
131
|
loader = spec.loader; assert loader is not None
|
130
|
-
loader.exec_module(mod)
|
132
|
+
loader.exec_module(mod)
|
131
133
|
|
132
134
|
d = {}
|
133
135
|
for f in Config._fields:
|