promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. promnesia/__init__.py +18 -4
  2. promnesia/__main__.py +104 -78
  3. promnesia/cannon.py +108 -107
  4. promnesia/common.py +107 -88
  5. promnesia/compare.py +33 -30
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +37 -34
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +13 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +19 -17
  12. promnesia/logging.py +27 -15
  13. promnesia/misc/install_server.py +32 -27
  14. promnesia/server.py +106 -79
  15. promnesia/sources/auto.py +104 -77
  16. promnesia/sources/auto_logseq.py +6 -5
  17. promnesia/sources/auto_obsidian.py +2 -2
  18. promnesia/sources/browser.py +20 -10
  19. promnesia/sources/browser_legacy.py +65 -50
  20. promnesia/sources/demo.py +7 -8
  21. promnesia/sources/fbmessenger.py +3 -3
  22. promnesia/sources/filetypes.py +22 -16
  23. promnesia/sources/github.py +9 -8
  24. promnesia/sources/guess.py +6 -2
  25. promnesia/sources/hackernews.py +7 -9
  26. promnesia/sources/hpi.py +5 -3
  27. promnesia/sources/html.py +11 -7
  28. promnesia/sources/hypothesis.py +3 -2
  29. promnesia/sources/instapaper.py +3 -2
  30. promnesia/sources/markdown.py +22 -12
  31. promnesia/sources/org.py +36 -17
  32. promnesia/sources/plaintext.py +41 -39
  33. promnesia/sources/pocket.py +5 -3
  34. promnesia/sources/reddit.py +24 -26
  35. promnesia/sources/roamresearch.py +5 -2
  36. promnesia/sources/rss.py +6 -8
  37. promnesia/sources/shellcmd.py +21 -11
  38. promnesia/sources/signal.py +27 -26
  39. promnesia/sources/smscalls.py +2 -3
  40. promnesia/sources/stackexchange.py +5 -4
  41. promnesia/sources/takeout.py +37 -34
  42. promnesia/sources/takeout_legacy.py +29 -19
  43. promnesia/sources/telegram.py +18 -12
  44. promnesia/sources/telegram_legacy.py +22 -11
  45. promnesia/sources/twitter.py +7 -6
  46. promnesia/sources/vcs.py +11 -6
  47. promnesia/sources/viber.py +11 -10
  48. promnesia/sources/website.py +8 -7
  49. promnesia/sources/zulip.py +3 -2
  50. promnesia/sqlite.py +13 -7
  51. promnesia/tests/common.py +10 -5
  52. promnesia/tests/server_helper.py +13 -10
  53. promnesia/tests/sources/test_auto.py +2 -3
  54. promnesia/tests/sources/test_filetypes.py +11 -8
  55. promnesia/tests/sources/test_hypothesis.py +10 -6
  56. promnesia/tests/sources/test_org.py +9 -5
  57. promnesia/tests/sources/test_plaintext.py +9 -8
  58. promnesia/tests/sources/test_shellcmd.py +13 -13
  59. promnesia/tests/sources/test_takeout.py +3 -5
  60. promnesia/tests/test_cannon.py +256 -239
  61. promnesia/tests/test_cli.py +12 -8
  62. promnesia/tests/test_compare.py +17 -13
  63. promnesia/tests/test_config.py +7 -8
  64. promnesia/tests/test_db_dump.py +15 -15
  65. promnesia/tests/test_extract.py +17 -10
  66. promnesia/tests/test_indexer.py +24 -18
  67. promnesia/tests/test_server.py +12 -13
  68. promnesia/tests/test_traverse.py +0 -2
  69. promnesia/tests/utils.py +3 -7
  70. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  71. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  72. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  73. promnesia/kjson.py +0 -121
  74. promnesia/sources/__init__.pyi +0 -0
  75. promnesia-1.2.20240810.dist-info/METADATA +0 -54
  76. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  77. promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
  78. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  79. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/common.py CHANGED
@@ -1,51 +1,55 @@
1
1
  from __future__ import annotations
2
2
 
3
- from contextlib import contextmanager
4
- from datetime import datetime, date
5
- from functools import lru_cache
6
- from glob import glob
7
3
  import itertools
8
4
  import logging
9
5
  import os
10
- from pathlib import Path
6
+ import re
11
7
  import shutil
12
- from subprocess import run, PIPE, Popen
8
+ import tempfile
9
+ import warnings
10
+ from collections.abc import Callable, Iterable, Sequence
11
+ from contextlib import contextmanager
12
+ from copy import copy
13
+ from datetime import date, datetime, timezone
14
+ from functools import lru_cache
15
+ from glob import glob
16
+ from pathlib import Path
17
+ from subprocess import PIPE, Popen, run
13
18
  from timeit import default_timer as timer
14
19
  from types import ModuleType
15
- from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
16
- import warnings
20
+ from typing import TYPE_CHECKING, NamedTuple, TypeAlias, TypeVar
21
+ from zoneinfo import ZoneInfo
17
22
 
23
+ import platformdirs
18
24
  from more_itertools import intersperse
19
- import pytz
20
25
 
21
26
  from .cannon import canonify
22
- from .compat import removeprefix
23
-
24
27
 
25
28
  _is_windows = os.name == 'nt'
26
29
 
27
30
  T = TypeVar('T')
28
- Res = Union[T, Exception]
31
+ Res: TypeAlias = T | Exception
29
32
 
30
- PathIsh = Union[str, Path]
33
+ PathIsh = str | Path
31
34
 
32
35
  Url = str
33
36
  SourceName = str
34
- DatetimeIsh = Union[datetime, date]
37
+ DatetimeIsh = datetime | date
35
38
  Context = str
36
39
  Second = int
37
40
 
41
+
38
42
  # TODO hmm. arguably, source and context are almost same things...
39
43
  class Loc(NamedTuple):
40
44
  title: str
41
- href: Optional[str]=None
45
+ href: str | None = None
42
46
 
43
47
  @classmethod
44
- def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
48
+ def make(cls, title: str, href: str | None = None) -> Loc:
45
49
  return cls(title=title, href=href)
46
50
 
47
51
  @classmethod
48
- def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
52
+ def file(cls, path: PathIsh, line: int | None = None, relative_to: Path | None = None) -> Loc:
49
53
  lstr = '' if line is None else f':{line}'
50
54
  # todo loc should be url encoded? dunno.
51
55
  # or use line=? eh. I don't know. Just ask in issues.
@@ -53,11 +57,11 @@ class Loc(NamedTuple):
53
57
  # todo: handler has to be overridable by config. This is needed for docker, but also for a "as a service" install, where the sources would be available on some remote webserver
54
58
  # maybe it should be treated as a format string, so that {line} may be a part of the result or not.
55
59
  # for local usage, editor:///file:line works, but if the txt file is only available through http, it breaks.
56
- #if get_config().MIME_HANDLER:
60
+ # if get_config().MIME_HANDLER:
57
61
  # handler = get_config().MIME_HANDLER
58
- #if True:
62
+ # if True:
59
63
  # handler = 'editor:///home/koom/promnesia/docker/'
60
- #else:
64
+ # else:
61
65
  handler = _detect_mime_handler()
62
66
 
63
67
  rel = Path(path)
@@ -65,13 +69,10 @@ class Loc(NamedTuple):
65
69
  try:
66
70
  # making it relative is a bit nicer for display
67
71
  rel = rel.relative_to(relative_to)
68
- except Exception as e:
69
- pass # todo log/warn?
72
+ except Exception:
73
+ pass # todo log/warn?
70
74
  loc = f'{rel}{lstr}'
71
- return cls.make(
72
- title=loc,
73
- href=f'{handler}{path}{lstr}'
74
- )
75
+ return cls.make(title=loc, href=f'{handler}{path}{lstr}')
75
76
 
76
77
  # TODO need some uniform way of string conversion
77
78
  # but generally, it will be
@@ -87,19 +88,21 @@ def warn_once(message: str) -> None:
87
88
 
88
89
 
89
90
  def _warn_no_xdg_mime() -> None:
90
- warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
91
+ warn_once(
92
+ "No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1"
93
+ )
91
94
 
92
95
 
93
96
  @lru_cache(1)
94
97
  def _detect_mime_handler() -> str:
95
98
  def exists(what: str) -> bool:
96
99
  try:
97
- r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
100
+ r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
98
101
  except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason
99
102
  _warn_no_xdg_mime()
100
103
  return False
101
104
  if r.returncode > 0:
102
- warnings.warn('xdg-mime failed') # hopefully rest is in stderr
105
+ warnings.warn('xdg-mime failed') # hopefully rest is in stderr
103
106
  return False
104
107
  # todo not sure if should check=True or something
105
108
  handler = r.stdout.decode('utf8').strip()
@@ -108,11 +111,13 @@ def _detect_mime_handler() -> str:
108
111
  # 1. detect legacy 'emacs:' handler (so it doesn't break for existing users)
109
112
  result = None
110
113
  if exists('emacs'):
111
- warnings.warn('''
114
+ warnings.warn(
115
+ '''
112
116
  'emacs:' handler is deprecated!
113
117
  Please use newer version at https://github.com/karlicoss/open-in-editor
114
118
  And remove the old one (most likely, rm ~/.local/share/applications/mimemacs.desktop && update-desktop-database ~/.local/share/applications).
115
- '''.rstrip())
119
+ '''.rstrip()
120
+ )
116
121
  result = 'emacs:'
117
122
 
118
123
  # 2. now try to use newer editor:// thing
@@ -120,10 +125,12 @@ def _detect_mime_handler() -> str:
120
125
 
121
126
  # TODO would be nice to collect warnings and display at the end
122
127
  if not exists('editor'):
123
- warnings.warn('''
128
+ warnings.warn(
129
+ '''
124
130
  You might want to install https://github.com/karlicoss/open-in-editor
125
131
  So you can jump to your text files straight from the browser
126
- '''.rstrip())
132
+ '''.rstrip()
133
+ )
127
134
  else:
128
135
  result = 'editor://'
129
136
 
@@ -139,39 +146,41 @@ class Visit(NamedTuple):
139
146
  # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
140
147
  dt: datetime
141
148
  locator: Loc
142
- context: Optional[Context] = None
143
- duration: Optional[Second] = None
149
+ context: Context | None = None
150
+ duration: Second | None = None
144
151
  # TODO shit. I need to insert it in chrome db....
145
152
  # TODO gonna be hard to fill retroactively.
146
153
  # spent: Optional[Second] = None
147
- debug: Optional[str] = None
154
+ debug: str | None = None
155
+
148
156
 
149
- Result = Union[Visit, Exception]
157
+ Result = Visit | Exception
150
158
  Results = Iterable[Result]
151
159
  Extractor = Callable[[], Results]
152
160
 
153
161
  Extraction = Result # TODO deprecate!
154
162
 
163
+
155
164
  class DbVisit(NamedTuple):
156
165
  norm_url: Url
157
166
  orig_url: Url
158
167
  dt: datetime
159
168
  locator: Loc
160
- src: Optional[SourceName] = None
161
- context: Optional[Context] = None
162
- duration: Optional[Second] = None
169
+ src: SourceName | None = None
170
+ context: Context | None = None
171
+ duration: Second | None = None
163
172
 
164
173
  @staticmethod
165
- def make(p: Visit, src: SourceName) -> Res['DbVisit']:
174
+ def make(p: Visit, src: SourceName) -> Res[DbVisit]:
166
175
  try:
167
176
  # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
168
177
  if isinstance(p.dt, datetime):
169
178
  dt = p.dt
170
179
  elif isinstance(p.dt, date):
171
180
  # TODO that won't be with timezone..
172
- dt = datetime.combine(p.dt, datetime.min.time()) # meh..
181
+ dt = datetime.combine(p.dt, datetime.min.time()) # meh..
173
182
  else:
174
- raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
183
+ raise TypeError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301
175
184
  except Exception as e:
176
185
  return e
177
186
 
@@ -196,35 +205,37 @@ Filter = Callable[[Url], bool]
196
205
 
197
206
 
198
207
  from .logging import LazyLogger
208
+
199
209
  logger = LazyLogger('promnesia', level='DEBUG')
200
210
 
211
+
201
212
  def get_logger() -> logging.Logger:
202
213
  # deprecate? no need since logger is lazy already
203
214
  return logger
204
215
 
205
216
 
206
-
207
- import tempfile
208
217
  # kinda singleton
209
218
  @lru_cache(1)
210
219
  def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
211
- # todo use appdirs?
220
+ # todo use platformdirs?
212
221
  tdir = tempfile.TemporaryDirectory(suffix="promnesia")
213
222
  return tdir
214
223
 
224
+
215
225
  # TODO use mypy literal?
216
226
  Syntax = str
217
227
 
218
228
 
219
229
  @lru_cache(None)
220
230
  def _get_urlextractor(syntax: Syntax):
221
- from urlextract import URLExtract # type: ignore
231
+ from urlextract import URLExtract # type: ignore[import-untyped]
232
+
222
233
  u = URLExtract()
223
234
  # https://github.com/lipoja/URLExtract/issues/13
224
- if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
235
+ if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
225
236
  # handle org-mode links properly..
226
237
  u._stop_chars_right |= {'[', ']'}
227
- u._stop_chars_left |= {'[', ']'}
238
+ u._stop_chars_left |= {'[', ']'}
228
239
  elif syntax in {'md', 'markdown'}:
229
240
  pass
230
241
  # u._stop_chars_right |= {','}
@@ -242,19 +253,19 @@ def _sanitize(url: str) -> str:
242
253
  return url
243
254
 
244
255
 
245
- def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
256
+ def iter_urls(s: str, *, syntax: Syntax = '') -> Iterable[Url]:
246
257
  urlextractor = _get_urlextractor(syntax=syntax)
247
258
  # note: it also has get_indices, might be useful
248
259
  for u in urlextractor.gen_urls(s):
249
260
  yield _sanitize(u)
250
261
 
251
262
 
252
- def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
263
+ def extract_urls(s: str, *, syntax: Syntax = '') -> list[Url]:
253
264
  return list(iter_urls(s=s, syntax=syntax))
254
265
 
255
266
 
256
267
  def from_epoch(ts: int) -> datetime:
257
- return datetime.fromtimestamp(ts, tz=pytz.utc)
268
+ return datetime.fromtimestamp(ts, tz=timezone.utc)
258
269
 
259
270
 
260
271
  def join_tags(tags: Iterable[str]) -> str:
@@ -274,7 +285,7 @@ class PathWithMtime(NamedTuple):
274
285
  mtime: float
275
286
 
276
287
  @classmethod
277
- def make(cls, p: Path) -> 'PathWithMtime':
288
+ def make(cls, p: Path) -> PathWithMtime:
278
289
  return cls(
279
290
  path=p,
280
291
  mtime=p.stat().st_mtime,
@@ -285,10 +296,7 @@ class PathWithMtime(NamedTuple):
285
296
  PreExtractor = Callable[..., Results]
286
297
 
287
298
 
288
- PreSource = Union[
289
- PreExtractor,
290
- ModuleType, # module with 'index' functon defined in it
291
- ]
299
+ PreSource = PreExtractor | ModuleType # module with 'index' functon defined in it
292
300
 
293
301
 
294
302
  # todo not sure about this...
@@ -300,7 +308,7 @@ def _guess_name(thing: PreSource) -> str:
300
308
  guess = thing.__module__
301
309
 
302
310
  dflt = 'promnesia.sources.'
303
- guess = removeprefix(guess, prefix=dflt)
311
+ guess = guess.removeprefix(dflt)
304
312
  if guess == 'config':
305
313
  # this happens when we define a lambda in config or something without properly wrapping in Source
306
314
  logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
@@ -320,7 +328,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
320
328
  class Source:
321
329
  # TODO make sure it works with empty src?
322
330
  # TODO later, make it properly optional?
323
- def __init__(self, ff: PreSource, *args, src: SourceName='', name: SourceName='', **kwargs) -> None:
331
+ def __init__(self, ff: PreSource, *args, src: SourceName = '', name: SourceName = '', **kwargs) -> None:
324
332
  # NOTE: in principle, would be nice to make the Source countructor to be as dumb as possible
325
333
  # so we could move _get_index_function inside extractor lambda
326
334
  # but that way we get nicer error reporting
@@ -354,6 +362,7 @@ class Source:
354
362
  # TODO deprecated!
355
363
  return self.name
356
364
 
365
+
357
366
  # TODO deprecated
358
367
  Indexer = Source
359
368
 
@@ -362,13 +371,15 @@ Indexer = Source
362
371
  # NOTE: used in configs...
363
372
  def last(path: PathIsh, *parts: str) -> Path:
364
373
  import os.path
365
- pp = os.path.join(str(path), *parts)
366
- return Path(max(glob(pp, recursive=True)))
367
374
 
375
+ pp = os.path.join(str(path), *parts) # noqa: PTH118
376
+ return Path(max(glob(pp, recursive=True))) # noqa: PTH207
368
377
 
369
- from .logging import setup_logger
370
378
 
371
- from copy import copy
379
+ from .logging import setup_logger # noqa: F401
380
+
381
+
382
+ # TODO get rid of this? not sure if still necessary
372
383
  def echain(ex: Exception, cause: Exception) -> Exception:
373
384
  e = copy(ex)
374
385
  e.__cause__ = cause
@@ -382,50 +393,48 @@ def echain(ex: Exception, cause: Exception) -> Exception:
382
393
 
383
394
  def slugify(x: str) -> str:
384
395
  # https://stackoverflow.com/a/38766141/706389
385
- import re
386
396
  valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
387
397
  return valid_file_name
388
398
 
389
399
 
390
400
  # todo cache?
391
- def appdirs():
401
+ def _platformdirs() -> platformdirs.PlatformDirs:
392
402
  under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
393
403
  # todo actually use test name?
394
404
  name = 'promnesia-test' if under_test else 'promnesia'
395
- import appdirs as ad # type: ignore[import-untyped]
396
- return ad.AppDirs(appname=name)
405
+ return platformdirs.PlatformDirs(appname=name)
397
406
 
398
407
 
399
408
  def default_output_dir() -> Path:
400
409
  # TODO: on Windows, there are two extra subdirectories (<AppAuthor>\<AppName>)
401
410
  # perhaps makes sense to create it here with parents to avoid issues downstream?
402
- return Path(appdirs().user_data_dir)
411
+ return Path(_platformdirs().user_data_dir)
403
412
 
404
413
 
405
414
  def default_cache_dir() -> Path:
406
- return Path(appdirs().user_cache_dir)
415
+ return Path(_platformdirs().user_cache_dir)
407
416
 
408
417
 
409
418
  # make it lazy, otherwise it might crash on module import (e.g. on Windows)
410
419
  # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
411
420
  @lru_cache(1)
412
- def _magic() -> Callable[[PathIsh], Optional[str]]:
421
+ def _magic() -> Callable[[PathIsh], str | None]:
413
422
  logger = get_logger()
414
423
  try:
415
- import magic # type: ignore
424
+ import magic # type: ignore[import-not-found]
416
425
  except Exception as e:
417
426
  logger.exception(e)
418
- defensive_msg: Optional[str] = None
427
+ defensive_msg: str | None = None
419
428
  if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
420
429
  defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
421
430
  elif isinstance(e, ImportError):
422
- emsg = getattr(e, 'msg', '') # make mypy happy
423
- if 'failed to find libmagic' in emsg: # probably the actual library is missing?...
431
+ emsg = getattr(e, 'msg', '') # make mypy happy
432
+ if 'failed to find libmagic' in emsg: # probably the actual library is missing?...
424
433
  defensive_msg = "couldn't import magic. See https://github.com/ahupp/python-magic#installation"
425
434
  if defensive_msg is not None:
426
435
  logger.warning(defensive_msg)
427
436
  warnings.warn(defensive_msg)
428
- return lambda path: None # stub
437
+ return lambda path: None # stub # noqa: ARG005
429
438
  else:
430
439
  raise e
431
440
  else:
@@ -437,11 +446,12 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
437
446
  @lru_cache(1)
438
447
  def _mimetypes():
439
448
  import mimetypes
449
+
440
450
  mimetypes.init()
441
451
  return mimetypes
442
452
 
443
453
 
444
- def mime(path: PathIsh) -> Optional[str]:
454
+ def mime(path: PathIsh) -> str | None:
445
455
  ps = str(path)
446
456
  mimetypes = _mimetypes()
447
457
  # first try mimetypes, it's only using the filename without opening the file
@@ -453,7 +463,7 @@ def mime(path: PathIsh) -> Optional[str]:
453
463
  return magic(ps)
454
464
 
455
465
 
456
- def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
466
+ def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
457
467
  prune_dir_args = []
458
468
  ignore_file_args = []
459
469
  if ignore:
@@ -473,10 +483,10 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
473
483
  *prune_dir_args,
474
484
  '-type', 'f',
475
485
  *ignore_file_args
476
- ]
486
+ ] # fmt: skip
477
487
 
478
488
 
479
- def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
489
+ def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
480
490
  from .config import extra_fd_args
481
491
 
482
492
  ignore_args = []
@@ -493,10 +503,10 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
493
503
  '--type', 'f',
494
504
  '.',
495
505
  str(root),
496
- ]
506
+ ] # fmt: skip
497
507
 
498
508
 
499
- def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
509
+ def traverse(root: Path, *, follow: bool = True, ignore: Sequence[str] = ()) -> Iterable[Path]:
500
510
  if not root.is_dir():
501
511
  yield root
502
512
  return
@@ -515,12 +525,14 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
515
525
 
516
526
  cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
517
527
  # try to use fd.. it cooperates well with gitignore etc, also faster than find
518
- for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
528
+ for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
519
529
  if shutil.which(x):
520
530
  cmd = [x, *fdfind_args(root, follow=follow, ignore=ignore)]
521
531
  break
522
532
  else:
523
- warnings.warn("'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'")
533
+ warnings.warn(
534
+ "'fdfind' is recommended for the best indexing performance. See https://github.com/sharkdp/fd#installation. Falling back to 'find'"
535
+ )
524
536
 
525
537
  logger.debug('running: %s', cmd)
526
538
  # TODO split by \0?
@@ -537,6 +549,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
537
549
  def get_system_zone() -> str:
538
550
  try:
539
551
  import tzlocal
552
+
540
553
  return tzlocal.get_localzone_name()
541
554
  except Exception as e:
542
555
  logger.exception(e)
@@ -545,14 +558,15 @@ def get_system_zone() -> str:
545
558
 
546
559
 
547
560
  @lru_cache(1)
548
- def get_system_tz() -> pytz.BaseTzInfo:
561
+ def get_system_tz() -> ZoneInfo:
549
562
  zone = get_system_zone()
550
563
  try:
551
- return pytz.timezone(zone)
564
+ return ZoneInfo(zone)
552
565
  except Exception as e:
553
566
  logger.exception(e)
554
567
  logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
555
- return pytz.utc
568
+ return ZoneInfo('UTC')
569
+
556
570
 
557
571
  # used in misc/install_server.py
558
572
  def root() -> Path:
@@ -574,7 +588,7 @@ def user_config_file() -> Path:
574
588
  if "PROMNESIA_CONFIG" in os.environ:
575
589
  return Path(os.environ["PROMNESIA_CONFIG"])
576
590
  else:
577
- return Path(appdirs().user_config_dir) / 'config.py'
591
+ return Path(_platformdirs().user_config_dir) / 'config.py'
578
592
 
579
593
 
580
594
  def default_config_path() -> Path:
@@ -589,7 +603,7 @@ def default_config_path() -> Path:
589
603
 
590
604
 
591
605
  @contextmanager
592
- def measure(tag: str='', *, logger: logging.Logger, unit: str='ms'):
606
+ def measure(tag: str = '', *, logger: logging.Logger, unit: str = 'ms'):
593
607
  before = timer()
594
608
  yield lambda: timer() - before
595
609
  after = timer()
@@ -605,3 +619,8 @@ def is_sqlite_db(x: Path) -> bool:
605
619
  'application/vnd.sqlite3',
606
620
  # TODO this mime can also match wal files/journals, not sure
607
621
  }
622
+
623
+
624
+ if not TYPE_CHECKING:
625
+ # todo deprecate properly --just backwards compat
626
+ from .compat import removeprefix # noqa: F401
promnesia/compare.py CHANGED
@@ -1,69 +1,71 @@
1
- #!/usr/bin/env python3
1
+ from __future__ import annotations
2
+
2
3
  # TODO perhaps make it external script?
3
4
  import argparse
4
- from pathlib import Path
5
5
  import logging
6
6
  import sys
7
- from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
8
-
7
+ from collections.abc import Iterator, Sequence
8
+ from pathlib import Path
9
+ from typing import TypeVar
9
10
 
10
- from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
11
+ from .common import DbVisit, PathWithMtime, Url
11
12
  from .database.load import row_to_db_visit
12
13
 
13
14
  # TODO include latest too?
14
15
  # from cconfig import ignore, filtered
15
16
 
17
+
16
18
  def get_logger():
17
19
  return logging.getLogger('promnesia-db-changes')
18
20
 
19
- # TODO return error depending on severity?
20
-
21
21
 
22
- from typing import TypeVar, Sequence
22
+ # TODO return error depending on severity?
23
23
 
24
24
 
25
25
  T = TypeVar('T')
26
26
 
27
+
27
28
  def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
28
- def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
29
- res: Dict[str, List[T]] = {}
29
+ def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
30
+ res: dict[str, list[T]] = {}
30
31
  for a in s:
31
32
  k = key(a)
32
- ll = res.get(k, None)
33
+ ll = res.get(k)
33
34
  if ll is None:
34
35
  ll = []
35
36
  res[k] = ll
36
37
  ll.append(a)
37
38
  return res
39
+
38
40
  da = make_dict(sa)
39
41
  db = make_dict(sb)
40
42
  ka = set(da.keys())
41
43
  kb = set(db.keys())
42
- onlya: Set[T] = set()
43
- common: Set[T] = set()
44
- onlyb: Set[T] = set()
44
+ onlya: set[T] = set()
45
+ common: set[T] = set()
46
+ onlyb: set[T] = set()
45
47
  for k in ka.union(kb):
46
48
  la = da.get(k, [])
47
49
  lb = db.get(k, [])
48
- common.update(la[:min(len(la), len(lb))])
50
+ common.update(la[: min(len(la), len(lb))])
49
51
  if len(la) > len(lb):
50
- onlya.update(la[len(lb):])
52
+ onlya.update(la[len(lb) :])
51
53
  if len(lb) > len(la):
52
- onlyb.update(lb[len(la):])
54
+ onlyb.update(lb[len(la) :])
53
55
 
54
56
  return onlya, common, onlyb
55
57
 
56
58
 
57
- def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
59
+ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
58
60
  logger = get_logger()
59
61
  logger.info('comparing between: %s', between)
60
62
 
61
- errors: List[DbVisit] = []
63
+ errors: list[DbVisit] = []
62
64
 
63
- umap: Dict[Url, List[DbVisit]] = {}
65
+ umap: dict[Url, list[DbVisit]] = {}
64
66
  for a in after:
65
67
  url = a.norm_url
66
- xx = umap.get(url, []) # TODO canonify here?
68
+ xx = umap.get(url, []) # TODO canonify here?
67
69
  xx.append(a)
68
70
  umap[url] = xx
69
71
 
@@ -71,8 +73,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
71
73
  errors.append(b)
72
74
  if log:
73
75
  logger.error('between %s missing %s', between, b)
74
- print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
75
-
76
+ print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
76
77
 
77
78
  # the idea is that we eliminate items simultaneously from both sets
78
79
  eliminations = [
@@ -80,7 +81,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
80
81
  ('without dt' , lambda x: x._replace(src='', dt='')),
81
82
  ('without context' , lambda x: x._replace(src='', context='', locator='')),
82
83
  ('without dt and context' , lambda x: x._replace(src='', dt='', context='', locator='')),
83
- ]
84
+ ] # fmt: skip
84
85
  for ename, ekey in eliminations:
85
86
  logger.info('eliminating by %s', ename)
86
87
  logger.info('before: %d, after: %d', len(before), len(after))
@@ -96,6 +97,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
96
97
 
97
98
  return errors
98
99
 
100
+
99
101
  def setup_parser(p):
100
102
  # TODO better name?
101
103
  p.add_argument('--intermediate-dir', type=Path)
@@ -108,8 +110,8 @@ def get_files(args):
108
110
  if len(args.paths) == 0:
109
111
  int_dir = args.intermediate_dir
110
112
  assert int_dir.exists()
111
- files = list(sorted(int_dir.glob('*.sqlite*')))
112
- files = files[-args.last:]
113
+ files = sorted(int_dir.glob('*.sqlite*'))
114
+ files = files[-args.last :]
113
115
  else:
114
116
  files = [Path(p) for p in args.paths]
115
117
  return files
@@ -126,7 +128,7 @@ def main():
126
128
  sys.exit(1)
127
129
 
128
130
 
129
- def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
131
+ def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
130
132
  assert len(files) > 0
131
133
 
132
134
  logger = get_logger()
@@ -137,9 +139,10 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
137
139
  for f in files:
138
140
  logger.info('processing %r', f)
139
141
  name = f.name
140
- this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
142
+ this_dts = name[0 : name.index('.')] # can't use stem due to multiple extensions..
143
+
144
+ from promnesia.server import _get_stuff # TODO ugh
141
145
 
142
- from promnesia.server import _get_stuff # TODO ugh
143
146
  engine, table = _get_stuff(PathWithMtime.make(f))
144
147
 
145
148
  with engine.connect() as conn:
@@ -153,6 +156,6 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
153
156
  last = vis
154
157
  last_dts = this_dts
155
158
 
159
+
156
160
  if __name__ == '__main__':
157
161
  main()
158
-