promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. promnesia/__init__.py +4 -1
  2. promnesia/__main__.py +72 -59
  3. promnesia/cannon.py +90 -89
  4. promnesia/common.py +74 -62
  5. promnesia/compare.py +15 -10
  6. promnesia/config.py +22 -17
  7. promnesia/database/dump.py +1 -2
  8. promnesia/extract.py +6 -6
  9. promnesia/logging.py +27 -15
  10. promnesia/misc/install_server.py +25 -19
  11. promnesia/server.py +69 -53
  12. promnesia/sources/auto.py +65 -51
  13. promnesia/sources/browser.py +7 -2
  14. promnesia/sources/browser_legacy.py +51 -40
  15. promnesia/sources/demo.py +0 -1
  16. promnesia/sources/fbmessenger.py +0 -1
  17. promnesia/sources/filetypes.py +15 -11
  18. promnesia/sources/github.py +4 -1
  19. promnesia/sources/guess.py +4 -1
  20. promnesia/sources/hackernews.py +5 -7
  21. promnesia/sources/hpi.py +3 -1
  22. promnesia/sources/html.py +4 -2
  23. promnesia/sources/instapaper.py +1 -0
  24. promnesia/sources/markdown.py +4 -4
  25. promnesia/sources/org.py +17 -8
  26. promnesia/sources/plaintext.py +14 -11
  27. promnesia/sources/pocket.py +2 -1
  28. promnesia/sources/reddit.py +5 -8
  29. promnesia/sources/roamresearch.py +3 -1
  30. promnesia/sources/rss.py +4 -5
  31. promnesia/sources/shellcmd.py +3 -6
  32. promnesia/sources/signal.py +14 -14
  33. promnesia/sources/smscalls.py +0 -1
  34. promnesia/sources/stackexchange.py +2 -2
  35. promnesia/sources/takeout.py +14 -21
  36. promnesia/sources/takeout_legacy.py +16 -10
  37. promnesia/sources/telegram.py +7 -3
  38. promnesia/sources/telegram_legacy.py +5 -5
  39. promnesia/sources/twitter.py +1 -1
  40. promnesia/sources/vcs.py +6 -3
  41. promnesia/sources/viber.py +2 -2
  42. promnesia/sources/website.py +4 -3
  43. promnesia/sqlite.py +10 -7
  44. promnesia/tests/common.py +2 -0
  45. promnesia/tests/server_helper.py +2 -2
  46. promnesia/tests/sources/test_filetypes.py +9 -7
  47. promnesia/tests/sources/test_hypothesis.py +7 -3
  48. promnesia/tests/sources/test_org.py +7 -2
  49. promnesia/tests/sources/test_plaintext.py +9 -7
  50. promnesia/tests/sources/test_shellcmd.py +10 -9
  51. promnesia/tests/test_cannon.py +254 -237
  52. promnesia/tests/test_cli.py +8 -2
  53. promnesia/tests/test_compare.py +16 -12
  54. promnesia/tests/test_db_dump.py +4 -3
  55. promnesia/tests/test_extract.py +7 -4
  56. promnesia/tests/test_indexer.py +10 -10
  57. promnesia/tests/test_server.py +10 -10
  58. promnesia/tests/utils.py +1 -5
  59. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  60. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  61. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  62. promnesia/kjson.py +0 -122
  63. promnesia/sources/__init__.pyi +0 -0
  64. promnesia-1.3.20241021.dist-info/METADATA +0 -55
  65. promnesia-1.3.20241021.dist-info/RECORD +0 -83
  66. promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
  67. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  68. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/common.py CHANGED
@@ -7,19 +7,20 @@ import re
7
7
  import shutil
8
8
  import tempfile
9
9
  import warnings
10
- from collections.abc import Iterable, Sequence
10
+ from collections.abc import Callable, Iterable, Sequence
11
11
  from contextlib import contextmanager
12
12
  from copy import copy
13
- from datetime import date, datetime
13
+ from datetime import date, datetime, timezone
14
14
  from functools import lru_cache
15
15
  from glob import glob
16
16
  from pathlib import Path
17
17
  from subprocess import PIPE, Popen, run
18
18
  from timeit import default_timer as timer
19
19
  from types import ModuleType
20
- from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, TypeVar, Union
20
+ from typing import TYPE_CHECKING, NamedTuple, TypeAlias, TypeVar
21
+ from zoneinfo import ZoneInfo
21
22
 
22
- import pytz
23
+ import platformdirs
23
24
  from more_itertools import intersperse
24
25
 
25
26
  from .cannon import canonify
@@ -27,27 +28,28 @@ from .cannon import canonify
27
28
  _is_windows = os.name == 'nt'
28
29
 
29
30
  T = TypeVar('T')
30
- Res = Union[T, Exception]
31
+ Res: TypeAlias = T | Exception
31
32
 
32
- PathIsh = Union[str, Path]
33
+ PathIsh = str | Path
33
34
 
34
35
  Url = str
35
36
  SourceName = str
36
- DatetimeIsh = Union[datetime, date]
37
+ DatetimeIsh = datetime | date
37
38
  Context = str
38
39
  Second = int
39
40
 
41
+
40
42
  # TODO hmm. arguably, source and context are almost same things...
41
43
  class Loc(NamedTuple):
42
44
  title: str
43
- href: Optional[str] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
45
+ href: str | None = None
44
46
 
45
47
  @classmethod
46
- def make(cls, title: str, href: str | None=None) -> Loc:
48
+ def make(cls, title: str, href: str | None = None) -> Loc:
47
49
  return cls(title=title, href=href)
48
50
 
49
51
  @classmethod
50
- def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc:
52
+ def file(cls, path: PathIsh, line: int | None = None, relative_to: Path | None = None) -> Loc:
51
53
  lstr = '' if line is None else f':{line}'
52
54
  # todo loc should be url encoded? dunno.
53
55
  # or use line=? eh. I don't know. Just ask in issues.
@@ -55,11 +57,11 @@ class Loc(NamedTuple):
55
57
  # todo: handler has to be overridable by config. This is needed for docker, but also for a "as a service" install, where the sources would be available on some remote webserver
56
58
  # maybe it should be treated as a format string, so that {line} may be a part of the result or not.
57
59
  # for local usage, editor:///file:line works, but if the txt file is only available through http, it breaks.
58
- #if get_config().MIME_HANDLER:
60
+ # if get_config().MIME_HANDLER:
59
61
  # handler = get_config().MIME_HANDLER
60
- #if True:
62
+ # if True:
61
63
  # handler = 'editor:///home/koom/promnesia/docker/'
62
- #else:
64
+ # else:
63
65
  handler = _detect_mime_handler()
64
66
 
65
67
  rel = Path(path)
@@ -67,13 +69,10 @@ class Loc(NamedTuple):
67
69
  try:
68
70
  # making it relative is a bit nicer for display
69
71
  rel = rel.relative_to(relative_to)
70
- except Exception as e:
71
- pass # todo log/warn?
72
+ except Exception:
73
+ pass # todo log/warn?
72
74
  loc = f'{rel}{lstr}'
73
- return cls.make(
74
- title=loc,
75
- href=f'{handler}{path}{lstr}'
76
- )
75
+ return cls.make(title=loc, href=f'{handler}{path}{lstr}')
77
76
 
78
77
  # TODO need some uniform way of string conversion
79
78
  # but generally, it will be
@@ -89,7 +88,9 @@ def warn_once(message: str) -> None:
89
88
 
90
89
 
91
90
  def _warn_no_xdg_mime() -> None:
92
- warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
91
+ warn_once(
92
+ "No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1"
93
+ )
93
94
 
94
95
 
95
96
  @lru_cache(1)
@@ -101,7 +102,7 @@ def _detect_mime_handler() -> str:
101
102
  _warn_no_xdg_mime()
102
103
  return False
103
104
  if r.returncode > 0:
104
- warnings.warn('xdg-mime failed') # hopefully rest is in stderr
105
+ warnings.warn('xdg-mime failed') # hopefully rest is in stderr
105
106
  return False
106
107
  # todo not sure if should check=True or something
107
108
  handler = r.stdout.decode('utf8').strip()
@@ -110,11 +111,13 @@ def _detect_mime_handler() -> str:
110
111
  # 1. detect legacy 'emacs:' handler (so it doesn't break for existing users)
111
112
  result = None
112
113
  if exists('emacs'):
113
- warnings.warn('''
114
+ warnings.warn(
115
+ '''
114
116
  'emacs:' handler is deprecated!
115
117
  Please use newer version at https://github.com/karlicoss/open-in-editor
116
118
  And remove the old one (most likely, rm ~/.local/share/applications/mimemacs.desktop && update-desktop-database ~/.local/share/applications).
117
- '''.rstrip())
119
+ '''.rstrip()
120
+ )
118
121
  result = 'emacs:'
119
122
 
120
123
  # 2. now try to use newer editor:// thing
@@ -122,10 +125,12 @@ def _detect_mime_handler() -> str:
122
125
 
123
126
  # TODO would be nice to collect warnings and display at the end
124
127
  if not exists('editor'):
125
- warnings.warn('''
128
+ warnings.warn(
129
+ '''
126
130
  You might want to install https://github.com/karlicoss/open-in-editor
127
131
  So you can jump to your text files straight from the browser
128
- '''.rstrip())
132
+ '''.rstrip()
133
+ )
129
134
  else:
130
135
  result = 'editor://'
131
136
 
@@ -148,20 +153,22 @@ class Visit(NamedTuple):
148
153
  # spent: Optional[Second] = None
149
154
  debug: str | None = None
150
155
 
151
- Result = Union[Visit, Exception]
156
+
157
+ Result = Visit | Exception
152
158
  Results = Iterable[Result]
153
159
  Extractor = Callable[[], Results]
154
160
 
155
161
  Extraction = Result # TODO deprecate!
156
162
 
163
+
157
164
  class DbVisit(NamedTuple):
158
165
  norm_url: Url
159
166
  orig_url: Url
160
167
  dt: datetime
161
168
  locator: Loc
162
- src: Optional[SourceName] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
163
- context: Optional[Context] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
164
- duration: Optional[Second] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
169
+ src: SourceName | None = None
170
+ context: Context | None = None
171
+ duration: Second | None = None
165
172
 
166
173
  @staticmethod
167
174
  def make(p: Visit, src: SourceName) -> Res[DbVisit]:
@@ -171,9 +178,9 @@ class DbVisit(NamedTuple):
171
178
  dt = p.dt
172
179
  elif isinstance(p.dt, date):
173
180
  # TODO that won't be with timezone..
174
- dt = datetime.combine(p.dt, datetime.min.time()) # meh..
181
+ dt = datetime.combine(p.dt, datetime.min.time()) # meh..
175
182
  else:
176
- raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301
183
+ raise TypeError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301
177
184
  except Exception as e:
178
185
  return e
179
186
 
@@ -201,32 +208,34 @@ from .logging import LazyLogger
201
208
 
202
209
  logger = LazyLogger('promnesia', level='DEBUG')
203
210
 
211
+
204
212
  def get_logger() -> logging.Logger:
205
213
  # deprecate? no need since logger is lazy already
206
214
  return logger
207
215
 
208
216
 
209
-
210
217
  # kinda singleton
211
218
  @lru_cache(1)
212
219
  def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
213
- # todo use appdirs?
220
+ # todo use platformdirs?
214
221
  tdir = tempfile.TemporaryDirectory(suffix="promnesia")
215
222
  return tdir
216
223
 
224
+
217
225
  # TODO use mypy literal?
218
226
  Syntax = str
219
227
 
220
228
 
221
229
  @lru_cache(None)
222
230
  def _get_urlextractor(syntax: Syntax):
223
- from urlextract import URLExtract # type: ignore
231
+ from urlextract import URLExtract # type: ignore[import-untyped]
232
+
224
233
  u = URLExtract()
225
234
  # https://github.com/lipoja/URLExtract/issues/13
226
- if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
235
+ if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
227
236
  # handle org-mode links properly..
228
237
  u._stop_chars_right |= {'[', ']'}
229
- u._stop_chars_left |= {'[', ']'}
238
+ u._stop_chars_left |= {'[', ']'}
230
239
  elif syntax in {'md', 'markdown'}:
231
240
  pass
232
241
  # u._stop_chars_right |= {','}
@@ -244,19 +253,19 @@ def _sanitize(url: str) -> str:
244
253
  return url
245
254
 
246
255
 
247
- def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
256
+ def iter_urls(s: str, *, syntax: Syntax = '') -> Iterable[Url]:
248
257
  urlextractor = _get_urlextractor(syntax=syntax)
249
258
  # note: it also has get_indices, might be useful
250
259
  for u in urlextractor.gen_urls(s):
251
260
  yield _sanitize(u)
252
261
 
253
262
 
254
- def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]:
263
+ def extract_urls(s: str, *, syntax: Syntax = '') -> list[Url]:
255
264
  return list(iter_urls(s=s, syntax=syntax))
256
265
 
257
266
 
258
267
  def from_epoch(ts: int) -> datetime:
259
- return datetime.fromtimestamp(ts, tz=pytz.utc)
268
+ return datetime.fromtimestamp(ts, tz=timezone.utc)
260
269
 
261
270
 
262
271
  def join_tags(tags: Iterable[str]) -> str:
@@ -287,10 +296,7 @@ class PathWithMtime(NamedTuple):
287
296
  PreExtractor = Callable[..., Results]
288
297
 
289
298
 
290
- PreSource = Union[
291
- PreExtractor,
292
- ModuleType, # module with 'index' functon defined in it
293
- ]
299
+ PreSource = PreExtractor | ModuleType # module with 'index' functon defined in it
294
300
 
295
301
 
296
302
  # todo not sure about this...
@@ -322,7 +328,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
322
328
  class Source:
323
329
  # TODO make sure it works with empty src?
324
330
  # TODO later, make it properly optional?
325
- def __init__(self, ff: PreSource, *args, src: SourceName='', name: SourceName='', **kwargs) -> None:
331
+ def __init__(self, ff: PreSource, *args, src: SourceName = '', name: SourceName = '', **kwargs) -> None:
326
332
  # NOTE: in principle, would be nice to make the Source countructor to be as dumb as possible
327
333
  # so we could move _get_index_function inside extractor lambda
328
334
  # but that way we get nicer error reporting
@@ -356,6 +362,7 @@ class Source:
356
362
  # TODO deprecated!
357
363
  return self.name
358
364
 
365
+
359
366
  # TODO deprecated
360
367
  Indexer = Source
361
368
 
@@ -364,6 +371,7 @@ Indexer = Source
364
371
  # NOTE: used in configs...
365
372
  def last(path: PathIsh, *parts: str) -> Path:
366
373
  import os.path
374
+
367
375
  pp = os.path.join(str(path), *parts) # noqa: PTH118
368
376
  return Path(max(glob(pp, recursive=True))) # noqa: PTH207
369
377
 
@@ -390,22 +398,21 @@ def slugify(x: str) -> str:
390
398
 
391
399
 
392
400
  # todo cache?
393
- def appdirs():
401
+ def _platformdirs() -> platformdirs.PlatformDirs:
394
402
  under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
395
403
  # todo actually use test name?
396
404
  name = 'promnesia-test' if under_test else 'promnesia'
397
- import appdirs as ad # type: ignore[import-untyped]
398
- return ad.AppDirs(appname=name)
405
+ return platformdirs.PlatformDirs(appname=name)
399
406
 
400
407
 
401
408
  def default_output_dir() -> Path:
402
409
  # TODO: on Windows, there are two extra subdirectories (<AppAuthor>\<AppName>)
403
410
  # perhaps makes sense to create it here with parents to avoid issues downstream?
404
- return Path(appdirs().user_data_dir)
411
+ return Path(_platformdirs().user_data_dir)
405
412
 
406
413
 
407
414
  def default_cache_dir() -> Path:
408
- return Path(appdirs().user_cache_dir)
415
+ return Path(_platformdirs().user_cache_dir)
409
416
 
410
417
 
411
418
  # make it lazy, otherwise it might crash on module import (e.g. on Windows)
@@ -414,15 +421,15 @@ def default_cache_dir() -> Path:
414
421
  def _magic() -> Callable[[PathIsh], str | None]:
415
422
  logger = get_logger()
416
423
  try:
417
- import magic # type: ignore
424
+ import magic # type: ignore[import-not-found]
418
425
  except Exception as e:
419
426
  logger.exception(e)
420
427
  defensive_msg: str | None = None
421
428
  if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
422
429
  defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
423
430
  elif isinstance(e, ImportError):
424
- emsg = getattr(e, 'msg', '') # make mypy happy
425
- if 'failed to find libmagic' in emsg: # probably the actual library is missing?...
431
+ emsg = getattr(e, 'msg', '') # make mypy happy
432
+ if 'failed to find libmagic' in emsg: # probably the actual library is missing?...
426
433
  defensive_msg = "couldn't import magic. See https://github.com/ahupp/python-magic#installation"
427
434
  if defensive_msg is not None:
428
435
  logger.warning(defensive_msg)
@@ -439,6 +446,7 @@ def _magic() -> Callable[[PathIsh], str | None]:
439
446
  @lru_cache(1)
440
447
  def _mimetypes():
441
448
  import mimetypes
449
+
442
450
  mimetypes.init()
443
451
  return mimetypes
444
452
 
@@ -475,7 +483,7 @@ def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[s
475
483
  *prune_dir_args,
476
484
  '-type', 'f',
477
485
  *ignore_file_args
478
- ]
486
+ ] # fmt: skip
479
487
 
480
488
 
481
489
  def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
@@ -495,10 +503,10 @@ def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list
495
503
  '--type', 'f',
496
504
  '.',
497
505
  str(root),
498
- ]
506
+ ] # fmt: skip
499
507
 
500
508
 
501
- def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]:
509
+ def traverse(root: Path, *, follow: bool = True, ignore: Sequence[str] = ()) -> Iterable[Path]:
502
510
  if not root.is_dir():
503
511
  yield root
504
512
  return
@@ -517,12 +525,14 @@ def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> It
517
525
 
518
526
  cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
519
527
  # try to use fd.. it cooperates well with gitignore etc, also faster than find
520
- for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
528
+ for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
521
529
  if shutil.which(x):
522
530
  cmd = [x, *fdfind_args(root, follow=follow, ignore=ignore)]
523
531
  break
524
532
  else:
525
- warnings.warn("'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'")
533
+ warnings.warn(
534
+ "'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'"
535
+ )
526
536
 
527
537
  logger.debug('running: %s', cmd)
528
538
  # TODO split by \0?
@@ -539,6 +549,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> It
539
549
  def get_system_zone() -> str:
540
550
  try:
541
551
  import tzlocal
552
+
542
553
  return tzlocal.get_localzone_name()
543
554
  except Exception as e:
544
555
  logger.exception(e)
@@ -547,14 +558,15 @@ def get_system_zone() -> str:
547
558
 
548
559
 
549
560
  @lru_cache(1)
550
- def get_system_tz() -> pytz.BaseTzInfo:
561
+ def get_system_tz() -> ZoneInfo:
551
562
  zone = get_system_zone()
552
563
  try:
553
- return pytz.timezone(zone)
564
+ return ZoneInfo(zone)
554
565
  except Exception as e:
555
566
  logger.exception(e)
556
567
  logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
557
- return pytz.utc
568
+ return ZoneInfo('UTC')
569
+
558
570
 
559
571
  # used in misc/install_server.py
560
572
  def root() -> Path:
@@ -576,7 +588,7 @@ def user_config_file() -> Path:
576
588
  if "PROMNESIA_CONFIG" in os.environ:
577
589
  return Path(os.environ["PROMNESIA_CONFIG"])
578
590
  else:
579
- return Path(appdirs().user_config_dir) / 'config.py'
591
+ return Path(_platformdirs().user_config_dir) / 'config.py'
580
592
 
581
593
 
582
594
  def default_config_path() -> Path:
@@ -591,7 +603,7 @@ def default_config_path() -> Path:
591
603
 
592
604
 
593
605
  @contextmanager
594
- def measure(tag: str='', *, logger: logging.Logger, unit: str='ms'):
606
+ def measure(tag: str = '', *, logger: logging.Logger, unit: str = 'ms'):
595
607
  before = timer()
596
608
  yield lambda: timer() - before
597
609
  after = timer()
promnesia/compare.py CHANGED
@@ -14,25 +14,29 @@ from .database.load import row_to_db_visit
14
14
  # TODO include latest too?
15
15
  # from cconfig import ignore, filtered
16
16
 
17
+
17
18
  def get_logger():
18
19
  return logging.getLogger('promnesia-db-changes')
19
20
 
21
+
20
22
  # TODO return error depending on severity?
21
23
 
22
24
 
23
25
  T = TypeVar('T')
24
26
 
27
+
25
28
  def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
26
29
  def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
27
30
  res: dict[str, list[T]] = {}
28
31
  for a in s:
29
32
  k = key(a)
30
- ll = res.get(k, None)
33
+ ll = res.get(k)
31
34
  if ll is None:
32
35
  ll = []
33
36
  res[k] = ll
34
37
  ll.append(a)
35
38
  return res
39
+
36
40
  da = make_dict(sa)
37
41
  db = make_dict(sb)
38
42
  ka = set(da.keys())
@@ -43,11 +47,11 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
43
47
  for k in ka.union(kb):
44
48
  la = da.get(k, [])
45
49
  lb = db.get(k, [])
46
- common.update(la[:min(len(la), len(lb))])
50
+ common.update(la[: min(len(la), len(lb))])
47
51
  if len(la) > len(lb):
48
- onlya.update(la[len(lb):])
52
+ onlya.update(la[len(lb) :])
49
53
  if len(lb) > len(la):
50
- onlyb.update(lb[len(la):])
54
+ onlyb.update(lb[len(la) :])
51
55
 
52
56
  return onlya, common, onlyb
53
57
 
@@ -61,7 +65,7 @@ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=Tr
61
65
  umap: dict[Url, list[DbVisit]] = {}
62
66
  for a in after:
63
67
  url = a.norm_url
64
- xx = umap.get(url, []) # TODO canonify here?
68
+ xx = umap.get(url, []) # TODO canonify here?
65
69
  xx.append(a)
66
70
  umap[url] = xx
67
71
 
@@ -71,14 +75,13 @@ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=Tr
71
75
  logger.error('between %s missing %s', between, b)
72
76
  print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
73
77
 
74
-
75
78
  # the idea is that we eliminate items simultaneously from both sets
76
79
  eliminations = [
77
80
  ('identity' , lambda x: x),
78
81
  ('without dt' , lambda x: x._replace(src='', dt='')),
79
82
  ('without context' , lambda x: x._replace(src='', context='', locator='')),
80
83
  ('without dt and context' , lambda x: x._replace(src='', dt='', context='', locator='')),
81
- ]
84
+ ] # fmt: skip
82
85
  for ename, ekey in eliminations:
83
86
  logger.info('eliminating by %s', ename)
84
87
  logger.info('before: %d, after: %d', len(before), len(after))
@@ -94,6 +97,7 @@ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=Tr
94
97
 
95
98
  return errors
96
99
 
100
+
97
101
  def setup_parser(p):
98
102
  # TODO better name?
99
103
  p.add_argument('--intermediate-dir', type=Path)
@@ -107,7 +111,7 @@ def get_files(args):
107
111
  int_dir = args.intermediate_dir
108
112
  assert int_dir.exists()
109
113
  files = sorted(int_dir.glob('*.sqlite*'))
110
- files = files[-args.last:]
114
+ files = files[-args.last :]
111
115
  else:
112
116
  files = [Path(p) for p in args.paths]
113
117
  return files
@@ -135,9 +139,10 @@ def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
135
139
  for f in files:
136
140
  logger.info('processing %r', f)
137
141
  name = f.name
138
- this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
142
+ this_dts = name[0 : name.index('.')] # can't use stem due to multiple extensions..
139
143
 
140
144
  from promnesia.server import _get_stuff # TODO ugh
145
+
141
146
  engine, table = _get_stuff(PathWithMtime.make(f))
142
147
 
143
148
  with engine.connect() as conn:
@@ -151,6 +156,6 @@ def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
151
156
  last = vis
152
157
  last_dts = this_dts
153
158
 
159
+
154
160
  if __name__ == '__main__':
155
161
  main()
156
-
promnesia/config.py CHANGED
@@ -4,10 +4,10 @@ import importlib
4
4
  import importlib.util
5
5
  import os
6
6
  import warnings
7
- from collections.abc import Iterable
7
+ from collections.abc import Callable, Iterable
8
8
  from pathlib import Path
9
9
  from types import ModuleType
10
- from typing import Callable, NamedTuple, Union
10
+ from typing import NamedTuple
11
11
 
12
12
  from .common import DbVisit, PathIsh, Res, Source, default_cache_dir, default_output_dir
13
13
 
@@ -17,37 +17,37 @@ HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]
17
17
  ModuleName = str
18
18
 
19
19
  # something that can be converted into a proper Source
20
- ConfigSource = Union[Source, ModuleName, ModuleType]
20
+ ConfigSource = Source | ModuleName | ModuleType
21
21
 
22
22
 
23
23
  class Config(NamedTuple):
24
24
  # TODO remove default from sources once migrated
25
- SOURCES: list[ConfigSource] = []
25
+ SOURCES: list[ConfigSource] = [] # noqa: RUF012
26
26
 
27
27
  # if not specified, uses user data dir
28
28
  OUTPUT_DIR: PathIsh | None = None
29
29
 
30
30
  CACHE_DIR: PathIsh | None = ''
31
- FILTERS: list[str] = []
31
+ FILTERS: list[str] = [] # noqa: RUF012
32
32
 
33
33
  HOOK: HookT | None = None
34
34
 
35
35
  #
36
36
  # NOTE: INDEXERS is deprecated, use SOURCES instead
37
- INDEXERS: list[ConfigSource] = []
38
- #MIME_HANDLER: Optional[str] = None # TODO
37
+ INDEXERS: list[ConfigSource] = [] # noqa: RUF012
38
+ # MIME_HANDLER: Optional[str] = None # TODO
39
39
 
40
40
  @property
41
41
  def sources(self) -> Iterable[Res[Source]]:
42
- idx = self.INDEXERS
43
-
44
42
  if len(self.INDEXERS) > 0:
45
43
  warnings.warn("'INDEXERS' is deprecated. Please use 'SOURCES'!", DeprecationWarning)
46
44
 
47
45
  raw = self.SOURCES + self.INDEXERS
48
46
 
49
47
  if len(raw) == 0:
50
- raise RuntimeError("Please specify SOURCES in the config! See https://github.com/karlicoss/promnesia#setup for more information")
48
+ raise RuntimeError(
49
+ "Please specify SOURCES in the config! See https://github.com/karlicoss/promnesia#setup for more information"
50
+ )
51
51
 
52
52
  for r in raw:
53
53
  if isinstance(r, ModuleName):
@@ -72,8 +72,8 @@ class Config(NamedTuple):
72
72
  cd = self.CACHE_DIR
73
73
  cpath: Path | None
74
74
  if cd is None:
75
- cpath = None # means 'disabled' in cachew
76
- elif cd == '': # meh.. but need to make it None friendly..
75
+ cpath = None # means 'disabled' in cachew
76
+ elif cd == '': # meh.. but need to make it None friendly..
77
77
  cpath = default_cache_dir()
78
78
  else:
79
79
  cpath = Path(cd)
@@ -97,12 +97,14 @@ class Config(NamedTuple):
97
97
  def hook(self) -> HookT | None:
98
98
  return self.HOOK
99
99
 
100
+
100
101
  instance: Config | None = None
101
102
 
102
103
 
103
104
  def has() -> bool:
104
105
  return instance is not None
105
106
 
107
+
106
108
  def get() -> Config:
107
109
  assert instance is not None, "Expected config to be set, but it's not"
108
110
  return instance
@@ -124,9 +126,12 @@ def import_config(config_file: PathIsh) -> Config:
124
126
 
125
127
  # todo just exec??
126
128
  name = p.stem
127
- spec = importlib.util.spec_from_file_location(name, p); assert spec is not None
128
- mod = importlib.util.module_from_spec(spec); assert mod is not None
129
- loader = spec.loader; assert loader is not None
129
+ spec = importlib.util.spec_from_file_location(name, p)
130
+ assert spec is not None
131
+ mod = importlib.util.module_from_spec(spec)
132
+ assert mod is not None
133
+ loader = spec.loader
134
+ assert loader is not None
130
135
  loader.exec_module(mod)
131
136
 
132
137
  d = {}
@@ -148,7 +153,7 @@ def use_cores() -> int | None:
148
153
  return None
149
154
  try:
150
155
  return int(cs)
151
- except ValueError: # any other value means 'use all
156
+ except ValueError: # any other value means 'use all
152
157
  return 0
153
158
 
154
159
 
@@ -158,5 +163,5 @@ def extra_fd_args() -> list[str]:
158
163
  Can be used to pass --ignore-file parameter
159
164
  '''
160
165
  v = os.environ.get('PROMNESIA_FD_EXTRA_ARGS', '')
161
- extra = v.split() # eh, hopefully splitting that way is ok...
166
+ extra = v.split() # eh, hopefully splitting that way is ok...
162
167
  return extra
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import sqlite3
4
4
  from collections.abc import Iterable
5
5
  from pathlib import Path
6
- from typing import Optional
7
6
 
8
7
  from more_itertools import chunked
9
8
  from sqlalchemy import (
@@ -51,7 +50,7 @@ def begin_immediate_transaction(conn):
51
50
  conn.exec_driver_sql('BEGIN IMMEDIATE')
52
51
 
53
52
 
54
- Stats = dict[Optional[SourceName], int]
53
+ Stats = dict[SourceName | None, int]
55
54
 
56
55
 
57
56
  # returns critical warnings