promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +38 -25
  3. promnesia/cannon.py +23 -23
  4. promnesia/common.py +49 -42
  5. promnesia/compare.py +18 -20
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +20 -22
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +14 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +13 -11
  12. promnesia/kjson.py +11 -10
  13. promnesia/logging.py +1 -1
  14. promnesia/misc/install_server.py +7 -8
  15. promnesia/server.py +42 -31
  16. promnesia/sources/auto.py +43 -30
  17. promnesia/sources/auto_logseq.py +6 -5
  18. promnesia/sources/auto_obsidian.py +2 -2
  19. promnesia/sources/browser.py +14 -9
  20. promnesia/sources/browser_legacy.py +17 -13
  21. promnesia/sources/demo.py +7 -7
  22. promnesia/sources/fbmessenger.py +3 -2
  23. promnesia/sources/filetypes.py +9 -7
  24. promnesia/sources/github.py +5 -7
  25. promnesia/sources/guess.py +2 -1
  26. promnesia/sources/hackernews.py +2 -2
  27. promnesia/sources/hpi.py +2 -2
  28. promnesia/sources/html.py +7 -5
  29. promnesia/sources/hypothesis.py +3 -2
  30. promnesia/sources/instapaper.py +2 -2
  31. promnesia/sources/markdown.py +17 -7
  32. promnesia/sources/org.py +20 -10
  33. promnesia/sources/plaintext.py +30 -31
  34. promnesia/sources/pocket.py +3 -2
  35. promnesia/sources/reddit.py +19 -18
  36. promnesia/sources/roamresearch.py +2 -1
  37. promnesia/sources/rss.py +3 -4
  38. promnesia/sources/shellcmd.py +19 -6
  39. promnesia/sources/signal.py +14 -13
  40. promnesia/sources/smscalls.py +2 -2
  41. promnesia/sources/stackexchange.py +3 -2
  42. promnesia/sources/takeout.py +23 -13
  43. promnesia/sources/takeout_legacy.py +15 -11
  44. promnesia/sources/telegram.py +13 -11
  45. promnesia/sources/telegram_legacy.py +18 -7
  46. promnesia/sources/twitter.py +6 -5
  47. promnesia/sources/vcs.py +5 -3
  48. promnesia/sources/viber.py +10 -9
  49. promnesia/sources/website.py +4 -4
  50. promnesia/sources/zulip.py +3 -2
  51. promnesia/sqlite.py +7 -4
  52. promnesia/tests/common.py +8 -5
  53. promnesia/tests/server_helper.py +11 -8
  54. promnesia/tests/sources/test_auto.py +2 -3
  55. promnesia/tests/sources/test_filetypes.py +2 -1
  56. promnesia/tests/sources/test_hypothesis.py +3 -3
  57. promnesia/tests/sources/test_org.py +2 -3
  58. promnesia/tests/sources/test_plaintext.py +0 -1
  59. promnesia/tests/sources/test_shellcmd.py +3 -4
  60. promnesia/tests/sources/test_takeout.py +3 -5
  61. promnesia/tests/test_cannon.py +5 -5
  62. promnesia/tests/test_cli.py +4 -6
  63. promnesia/tests/test_compare.py +1 -1
  64. promnesia/tests/test_config.py +7 -8
  65. promnesia/tests/test_db_dump.py +11 -12
  66. promnesia/tests/test_extract.py +10 -6
  67. promnesia/tests/test_indexer.py +14 -8
  68. promnesia/tests/test_server.py +2 -3
  69. promnesia/tests/test_traverse.py +0 -2
  70. promnesia/tests/utils.py +4 -4
  71. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
  72. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  73. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  74. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  75. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  76. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
  77. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/__init__.py CHANGED
@@ -1,6 +1,17 @@
1
- from pathlib import Path
2
- from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res
3
-
4
1
  # add deprecation warning so eventually this may converted to a namespace package?
5
2
  import warnings
3
+
4
+ from .common import ( # noqa: F401
5
+ Context,
6
+ DbVisit,
7
+ Loc,
8
+ PathIsh,
9
+ Res,
10
+ Results,
11
+ Source,
12
+ Visit,
13
+ last,
14
+ )
15
+
16
+ # TODO think again about it -- what are the pros and cons?
6
17
  warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning)
promnesia/__main__.py CHANGED
@@ -5,24 +5,34 @@ import ast
5
5
  import importlib
6
6
  import inspect
7
7
  import os
8
- from pathlib import Path
8
+ import shlex
9
9
  import shutil
10
- from subprocess import run, check_call, Popen
11
10
  import sys
11
+ from collections.abc import Iterable, Iterator, Sequence
12
+ from pathlib import Path
13
+ from subprocess import Popen, check_call, run
12
14
  from tempfile import TemporaryDirectory, gettempdir
13
- from typing import Callable, Sequence, Iterable, Iterator, Union
14
-
15
-
16
- from . import config
17
- from . import server
18
- from .misc import install_server
19
- from .common import Extractor, PathIsh, logger, get_tmpdir, DbVisit, Res
20
- from .common import Source, get_system_tz, user_config_file, default_config_path
15
+ from typing import Callable
16
+
17
+ from . import config, server
18
+ from .common import (
19
+ DbVisit,
20
+ Extractor,
21
+ PathIsh,
22
+ Res,
23
+ Source,
24
+ default_config_path,
25
+ get_system_tz,
26
+ get_tmpdir,
27
+ logger,
28
+ user_config_file,
29
+ )
21
30
  from .database.dump import visits_to_sqlite
22
31
  from .extract import extract_visits
32
+ from .misc import install_server
23
33
 
24
34
 
25
- def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]:
35
+ def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]:
26
36
  cfg = config.get()
27
37
  output_dir = cfg.output_dir
28
38
  # not sure if belongs here??
@@ -74,7 +84,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
74
84
  logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
75
85
 
76
86
 
77
- def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]:
87
+ def _do_index(*, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False) -> Iterable[Exception]:
78
88
  # also keep & return errors for further display
79
89
  errors: list[Exception] = []
80
90
  def it() -> Iterable[Res[DbVisit]]:
@@ -98,9 +108,10 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
98
108
 
99
109
  def do_index(
100
110
  config_file: Path,
101
- dry: bool=False,
102
- sources_subset: Iterable[Union[str, int]]=(),
103
- overwrite_db: bool=False,
111
+ *,
112
+ dry: bool = False,
113
+ sources_subset: Iterable[str | int] = (),
114
+ overwrite_db: bool = False,
104
115
  ) -> Sequence[Exception]:
105
116
  config.load_from(config_file) # meh.. should be cleaner
106
117
  try:
@@ -120,7 +131,8 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]:
120
131
  def lazy(name: str) -> Callable[[], Extractor]:
121
132
  # helper to avoid failed imports etc, since people might be lacking necessary dependencies
122
133
  def inner() -> Extractor:
123
- from . import sources
134
+ # TODO why this import??
135
+ from . import sources # noqa: F401
124
136
  module = importlib.import_module(f'promnesia.sources.{name}')
125
137
  return getattr(module, 'index')
126
138
  return inner
@@ -145,7 +157,7 @@ def do_demo(
145
157
  config_file: Path | None,
146
158
  dry: bool=False,
147
159
  name: str='demo',
148
- sources_subset: Iterable[Union[str, int]]=(),
160
+ sources_subset: Iterable[str | int]=(),
149
161
  overwrite_db: bool=False,
150
162
  ) -> None:
151
163
  with TemporaryDirectory() as tdir:
@@ -219,9 +231,10 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
219
231
  logger.info('config: %s', cfg)
220
232
 
221
233
  def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
222
- logger.debug(' '.join(map(str, cmd)))
223
- res = run(cmd, **kwargs)
234
+ logger.debug(shlex.join(map(str, cmd)))
235
+ res = run(cmd, **kwargs) # noqa: PLW1510
224
236
  if res.returncode > 0:
237
+ # TODO what's up with empty exception??
225
238
  yield Exception()
226
239
 
227
240
  logger.info('Checking syntax...')
@@ -239,7 +252,7 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
239
252
  # todo not sure if should be more defensive than check_call here
240
253
  logger.info('Checking type safety...')
241
254
  try:
242
- import mypy
255
+ import mypy # noqa: F401
243
256
  except ImportError:
244
257
  logger.warning("mypy not found, can't use it to check config!")
245
258
  else:
@@ -291,7 +304,7 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
291
304
  logger.info('You should see the database path and version above!')
292
305
 
293
306
 
294
- def _ordinal_or_name(s: str) -> Union[str, int]:
307
+ def _ordinal_or_name(s: str) -> str | int:
295
308
  try:
296
309
  s = int(s) # type: ignore
297
310
  except ValueError:
@@ -328,7 +341,7 @@ def main() -> None:
328
341
 
329
342
  F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
330
343
  p = argparse.ArgumentParser(formatter_class=F)
331
- subp = p.add_subparsers(dest='mode', )
344
+ subp = p.add_subparsers(dest='mode' )
332
345
  ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
333
346
  add_index_args(ep, default_config_path())
334
347
  # TODO use some way to override or provide config only via cmdline?
@@ -348,7 +361,7 @@ def main() -> None:
348
361
  ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server')
349
362
  ap.add_argument(
350
363
  '--as',
351
- choices=list(sorted(demo_sources().keys())),
364
+ choices=sorted(demo_sources().keys()),
352
365
  default='guess',
353
366
  help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
354
367
  )
@@ -359,7 +372,7 @@ def main() -> None:
359
372
  install_server.setup_parser(isp)
360
373
 
361
374
  cp = subp.add_parser('config', help='Config management')
362
- cp.set_defaults(func=lambda *args: cp.print_help())
375
+ cp.set_defaults(func=lambda *_args: cp.print_help())
363
376
  scp = cp.add_subparsers()
364
377
  ccp = scp.add_parser('check', help='Check config')
365
378
  ccp.set_defaults(func=config_check)
@@ -373,7 +386,7 @@ def main() -> None:
373
386
 
374
387
  dp = subp.add_parser('doctor', help='Troubleshooting assistant')
375
388
  dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
376
- dp.set_defaults(func=lambda *args: dp.print_help())
389
+ dp.set_defaults(func=lambda *_args: dp.print_help())
377
390
  sdp = dp.add_subparsers()
378
391
  sdp.add_parser('config' , help='Check config' ).set_defaults(func=config_check )
379
392
  sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db)
promnesia/cannon.py CHANGED
@@ -9,16 +9,17 @@ are same content, but you can't tell that by URL equality. Even canonical urls a
9
9
 
10
10
  Also some experiments to establish 'URL hierarchy'.
11
11
  """
12
- # TODO eh?? they fixed mobile.twitter.com?
12
+ from __future__ import annotations
13
13
 
14
- from itertools import chain
15
14
  import re
16
15
  import typing
17
- from typing import Iterable, NamedTuple, Set, Optional, List, Sequence, Union, Tuple, Dict, Any, Collection
18
-
19
16
  import urllib.parse
20
- from urllib.parse import urlsplit, parse_qsl, urlunsplit, parse_qs, urlencode, SplitResult
17
+ from collections.abc import Collection, Iterable, Sequence
21
18
 
19
+ # TODO eh?? they fixed mobile.twitter.com?
20
+ from itertools import chain
21
+ from typing import Any, NamedTuple, Union
22
+ from urllib.parse import SplitResult, parse_qsl, urlencode, urlsplit, urlunsplit
22
23
 
23
24
  # this has some benchmark, but quite a few librarires seem unmaintained, sadly
24
25
  # I guess i'll stick to default for now, until it's a critical bottleneck
@@ -108,11 +109,11 @@ default_qkeep = [
108
109
 
109
110
  # TODO perhaps, decide if fragment is meaningful (e.g. wiki) or random sequence of letters?
110
111
  class Spec(NamedTuple):
111
- qkeep : Optional[Union[Collection[str], bool]] = None
112
- qremove: Optional[Set[str]] = None
112
+ qkeep : Collection[str] | bool | None = None
113
+ qremove: set[str] | None = None
113
114
  fkeep : bool = False
114
115
 
115
- def keep_query(self, q: str) -> Optional[int]: # returns order
116
+ def keep_query(self, q: str) -> int | None: # returns order
116
117
  if self.qkeep is True:
117
118
  return 1
118
119
  qkeep = {
@@ -134,13 +135,13 @@ class Spec(NamedTuple):
134
135
  return None
135
136
 
136
137
  @classmethod
137
- def make(cls, **kwargs) -> 'Spec':
138
+ def make(cls, **kwargs) -> Spec:
138
139
  return cls(**kwargs)
139
140
 
140
141
  S = Spec
141
142
 
142
143
  # TODO perhaps these can be machine learnt from large set of urls?
143
- specs: Dict[str, Spec] = {
144
+ specs: dict[str, Spec] = {
144
145
  'youtube.com': S(
145
146
  # TODO search_query?
146
147
  qkeep=[ # note: experimental.. order matters here
@@ -178,7 +179,6 @@ specs: Dict[str, Spec] = {
178
179
 
179
180
  'source', 'tsid', 'refsrc', 'pnref', 'rc', '_rdr', 'src', 'hc_location', 'section', 'permPage', 'soft', 'pn_ref', 'action',
180
181
  'ti', 'aref', 'event_time_id', 'action_history', 'filter', 'ref_notif_type', 'has_source', 'source_newsfeed_story_type',
181
- 'ref_notif_type',
182
182
  },
183
183
  ),
184
184
  'physicstravelguide.com': S(fkeep=True), # TODO instead, pass fkeep marker object for shorter spec?
@@ -218,10 +218,10 @@ Spec2 = Any # TODO
218
218
 
219
219
  # TODO this should be a map
220
220
  Frag = Any
221
- Parts = Sequence[Tuple[str, str]]
221
+ Parts = Sequence[tuple[str, str]]
222
222
 
223
223
 
224
- def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts, Frag]:
224
+ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> tuple[Any, Any, Parts, Frag]:
225
225
  if path[:5] == '/from':
226
226
  site = dict(qq).get('site')
227
227
  if site is not None:
@@ -232,7 +232,7 @@ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts,
232
232
  # TODO this should be in-place? for brevity?
233
233
  return (domain, path, qq, frag)
234
234
 
235
- def get_spec2(dom: str) -> Optional[Spec2]:
235
+ def get_spec2(dom: str) -> Spec2 | None:
236
236
  return {
237
237
  'news.ycombinator.com': _yc,
238
238
  }.get(dom)
@@ -285,10 +285,10 @@ def transform_split(split: SplitResult):
285
285
  REST = r'(?P<rest>.*)'
286
286
 
287
287
  Left = Union[str, Sequence[str]]
288
- Right = Tuple[str, str, str]
288
+ Right = tuple[str, str, str]
289
289
  # the idea is that we can unify certain URLs here and map them to the 'canonical' one
290
290
  # this is a dict only for grouping but should be a list really.. todo
291
- rules: Dict[Left, Right] = {
291
+ rules: dict[Left, Right] = {
292
292
  # TODO m. handling might be quite common
293
293
  # f'm.youtube.com/{REST}': ('youtube.com', '{rest}'),
294
294
  (
@@ -322,9 +322,9 @@ def transform_split(split: SplitResult):
322
322
  continue
323
323
  gd = m.groupdict()
324
324
  if len(to) == 2:
325
- to = to + ('', )
325
+ to = (*to, '')
326
326
 
327
- (netloc, path, qq) = [t.format(**gd) for t in to]
327
+ (netloc, path, qq) = (t.format(**gd) for t in to)
328
328
  qparts.extend(parse_qsl(qq, keep_blank_values=True)) # TODO hacky..
329
329
  # TODO eh, qparts should really be a map or something...
330
330
  break
@@ -361,7 +361,7 @@ def myunsplit(domain: str, path: str, query: str, fragment: str) -> str:
361
361
  # ]
362
362
  # for re in regexes:
363
363
 
364
- def handle_archive_org(url: str) -> Optional[str]:
364
+ def handle_archive_org(url: str) -> str | None:
365
365
  are = r'web.archive.org/web/(?P<timestamp>\d+)/(?P<rest>.*)'
366
366
  m = re.fullmatch(are, url)
367
367
  if m is None:
@@ -697,8 +697,8 @@ def groups(it, args): # pragma: no cover
697
697
  all_pats = get_patterns()
698
698
 
699
699
  from collections import Counter
700
- c: typing.Counter[Optional[str]] = Counter()
701
- unmatched: List[str] = []
700
+ c: typing.Counter[str | None] = Counter()
701
+ unmatched: list[str] = []
702
702
 
703
703
  def dump():
704
704
  print(c)
@@ -756,10 +756,10 @@ def groups(it, args): # pragma: no cover
756
756
  def display(it, args) -> None: # pragma: no cover
757
757
  # TODO better name?
758
758
  import difflib
759
- # pylint: disable=import-error
760
- from termcolor import colored as C # type: ignore
761
759
  from sys import stdout
762
760
 
761
+ from termcolor import colored as C # type: ignore
762
+
763
763
  for line in it:
764
764
  line = line.strip()
765
765
  if args.human:
promnesia/common.py CHANGED
@@ -1,26 +1,28 @@
1
1
  from __future__ import annotations
2
2
 
3
- from contextlib import contextmanager
4
- from datetime import datetime, date
5
- from functools import lru_cache
6
- from glob import glob
7
3
  import itertools
8
4
  import logging
9
5
  import os
10
- from pathlib import Path
6
+ import re
11
7
  import shutil
12
- from subprocess import run, PIPE, Popen
8
+ import tempfile
9
+ import warnings
10
+ from collections.abc import Iterable, Sequence
11
+ from contextlib import contextmanager
12
+ from copy import copy
13
+ from datetime import date, datetime
14
+ from functools import lru_cache
15
+ from glob import glob
16
+ from pathlib import Path
17
+ from subprocess import PIPE, Popen, run
13
18
  from timeit import default_timer as timer
14
19
  from types import ModuleType
15
- from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
16
- import warnings
20
+ from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, TypeVar, Union
17
21
 
18
- from more_itertools import intersperse
19
22
  import pytz
23
+ from more_itertools import intersperse
20
24
 
21
25
  from .cannon import canonify
22
- from .compat import removeprefix
23
-
24
26
 
25
27
  _is_windows = os.name == 'nt'
26
28
 
@@ -38,14 +40,14 @@ Second = int
38
40
  # TODO hmm. arguably, source and context are almost same things...
39
41
  class Loc(NamedTuple):
40
42
  title: str
41
- href: Optional[str]=None
43
+ href: Optional[str] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
42
44
 
43
45
  @classmethod
44
- def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
46
+ def make(cls, title: str, href: str | None=None) -> Loc:
45
47
  return cls(title=title, href=href)
46
48
 
47
49
  @classmethod
48
- def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
50
+ def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc:
49
51
  lstr = '' if line is None else f':{line}'
50
52
  # todo loc should be url encoded? dunno.
51
53
  # or use line=? eh. I don't know. Just ask in issues.
@@ -94,7 +96,7 @@ def _warn_no_xdg_mime() -> None:
94
96
  def _detect_mime_handler() -> str:
95
97
  def exists(what: str) -> bool:
96
98
  try:
97
- r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
99
+ r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
98
100
  except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason
99
101
  _warn_no_xdg_mime()
100
102
  return False
@@ -139,12 +141,12 @@ class Visit(NamedTuple):
139
141
  # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
140
142
  dt: datetime
141
143
  locator: Loc
142
- context: Optional[Context] = None
143
- duration: Optional[Second] = None
144
+ context: Context | None = None
145
+ duration: Second | None = None
144
146
  # TODO shit. I need to insert it in chrome db....
145
147
  # TODO gonna be hard to fill retroactively.
146
148
  # spent: Optional[Second] = None
147
- debug: Optional[str] = None
149
+ debug: str | None = None
148
150
 
149
151
  Result = Union[Visit, Exception]
150
152
  Results = Iterable[Result]
@@ -157,12 +159,12 @@ class DbVisit(NamedTuple):
157
159
  orig_url: Url
158
160
  dt: datetime
159
161
  locator: Loc
160
- src: Optional[SourceName] = None
161
- context: Optional[Context] = None
162
- duration: Optional[Second] = None
162
+ src: Optional[SourceName] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
163
+ context: Optional[Context] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
164
+ duration: Optional[Second] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
163
165
 
164
166
  @staticmethod
165
- def make(p: Visit, src: SourceName) -> Res['DbVisit']:
167
+ def make(p: Visit, src: SourceName) -> Res[DbVisit]:
166
168
  try:
167
169
  # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
168
170
  if isinstance(p.dt, datetime):
@@ -171,7 +173,7 @@ class DbVisit(NamedTuple):
171
173
  # TODO that won't be with timezone..
172
174
  dt = datetime.combine(p.dt, datetime.min.time()) # meh..
173
175
  else:
174
- raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
176
+ raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301
175
177
  except Exception as e:
176
178
  return e
177
179
 
@@ -196,6 +198,7 @@ Filter = Callable[[Url], bool]
196
198
 
197
199
 
198
200
  from .logging import LazyLogger
201
+
199
202
  logger = LazyLogger('promnesia', level='DEBUG')
200
203
 
201
204
  def get_logger() -> logging.Logger:
@@ -204,7 +207,6 @@ def get_logger() -> logging.Logger:
204
207
 
205
208
 
206
209
 
207
- import tempfile
208
210
  # kinda singleton
209
211
  @lru_cache(1)
210
212
  def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
@@ -218,7 +220,7 @@ Syntax = str
218
220
 
219
221
  @lru_cache(None)
220
222
  def _get_urlextractor(syntax: Syntax):
221
- from urlextract import URLExtract # type: ignore
223
+ from urlextract import URLExtract # type: ignore
222
224
  u = URLExtract()
223
225
  # https://github.com/lipoja/URLExtract/issues/13
224
226
  if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
@@ -249,7 +251,7 @@ def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
249
251
  yield _sanitize(u)
250
252
 
251
253
 
252
- def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
254
+ def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]:
253
255
  return list(iter_urls(s=s, syntax=syntax))
254
256
 
255
257
 
@@ -274,7 +276,7 @@ class PathWithMtime(NamedTuple):
274
276
  mtime: float
275
277
 
276
278
  @classmethod
277
- def make(cls, p: Path) -> 'PathWithMtime':
279
+ def make(cls, p: Path) -> PathWithMtime:
278
280
  return cls(
279
281
  path=p,
280
282
  mtime=p.stat().st_mtime,
@@ -300,7 +302,7 @@ def _guess_name(thing: PreSource) -> str:
300
302
  guess = thing.__module__
301
303
 
302
304
  dflt = 'promnesia.sources.'
303
- guess = removeprefix(guess, prefix=dflt)
305
+ guess = guess.removeprefix(dflt)
304
306
  if guess == 'config':
305
307
  # this happens when we define a lambda in config or something without properly wrapping in Source
306
308
  logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
@@ -362,13 +364,14 @@ Indexer = Source
362
364
  # NOTE: used in configs...
363
365
  def last(path: PathIsh, *parts: str) -> Path:
364
366
  import os.path
365
- pp = os.path.join(str(path), *parts)
366
- return Path(max(glob(pp, recursive=True)))
367
+ pp = os.path.join(str(path), *parts) # noqa: PTH118
368
+ return Path(max(glob(pp, recursive=True))) # noqa: PTH207
367
369
 
368
370
 
369
- from .logging import setup_logger
371
+ from .logging import setup_logger # noqa: F401
370
372
 
371
- from copy import copy
373
+
374
+ # TODO get rid of this? not sure if still necessary
372
375
  def echain(ex: Exception, cause: Exception) -> Exception:
373
376
  e = copy(ex)
374
377
  e.__cause__ = cause
@@ -382,7 +385,6 @@ def echain(ex: Exception, cause: Exception) -> Exception:
382
385
 
383
386
  def slugify(x: str) -> str:
384
387
  # https://stackoverflow.com/a/38766141/706389
385
- import re
386
388
  valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
387
389
  return valid_file_name
388
390
 
@@ -392,7 +394,7 @@ def appdirs():
392
394
  under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
393
395
  # todo actually use test name?
394
396
  name = 'promnesia-test' if under_test else 'promnesia'
395
- import appdirs as ad # type: ignore[import-untyped]
397
+ import appdirs as ad # type: ignore[import-untyped]
396
398
  return ad.AppDirs(appname=name)
397
399
 
398
400
 
@@ -409,13 +411,13 @@ def default_cache_dir() -> Path:
409
411
  # make it lazy, otherwise it might crash on module import (e.g. on Windows)
410
412
  # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
411
413
  @lru_cache(1)
412
- def _magic() -> Callable[[PathIsh], Optional[str]]:
414
+ def _magic() -> Callable[[PathIsh], str | None]:
413
415
  logger = get_logger()
414
416
  try:
415
- import magic # type: ignore
417
+ import magic # type: ignore
416
418
  except Exception as e:
417
419
  logger.exception(e)
418
- defensive_msg: Optional[str] = None
420
+ defensive_msg: str | None = None
419
421
  if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
420
422
  defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
421
423
  elif isinstance(e, ImportError):
@@ -425,7 +427,7 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
425
427
  if defensive_msg is not None:
426
428
  logger.warning(defensive_msg)
427
429
  warnings.warn(defensive_msg)
428
- return lambda path: None # stub
430
+ return lambda path: None # stub # noqa: ARG005
429
431
  else:
430
432
  raise e
431
433
  else:
@@ -441,7 +443,7 @@ def _mimetypes():
441
443
  return mimetypes
442
444
 
443
445
 
444
- def mime(path: PathIsh) -> Optional[str]:
446
+ def mime(path: PathIsh) -> str | None:
445
447
  ps = str(path)
446
448
  mimetypes = _mimetypes()
447
449
  # first try mimetypes, it's only using the filename without opening the file
@@ -453,7 +455,7 @@ def mime(path: PathIsh) -> Optional[str]:
453
455
  return magic(ps)
454
456
 
455
457
 
456
- def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
458
+ def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
457
459
  prune_dir_args = []
458
460
  ignore_file_args = []
459
461
  if ignore:
@@ -476,7 +478,7 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
476
478
  ]
477
479
 
478
480
 
479
- def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
481
+ def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
480
482
  from .config import extra_fd_args
481
483
 
482
484
  ignore_args = []
@@ -496,7 +498,7 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
496
498
  ]
497
499
 
498
500
 
499
- def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
501
+ def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]:
500
502
  if not root.is_dir():
501
503
  yield root
502
504
  return
@@ -605,3 +607,8 @@ def is_sqlite_db(x: Path) -> bool:
605
607
  'application/vnd.sqlite3',
606
608
  # TODO this mime can also match wal files/journals, not sure
607
609
  }
610
+
611
+
612
+ if not TYPE_CHECKING:
613
+ # todo deprecate properly --just backwards compat
614
+ from .compat import removeprefix # noqa: F401
promnesia/compare.py CHANGED
@@ -1,13 +1,14 @@
1
- #!/usr/bin/env python3
1
+ from __future__ import annotations
2
+
2
3
  # TODO perhaps make it external script?
3
4
  import argparse
4
- from pathlib import Path
5
5
  import logging
6
6
  import sys
7
- from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
8
-
7
+ from collections.abc import Iterator, Sequence
8
+ from pathlib import Path
9
+ from typing import TypeVar
9
10
 
10
- from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
11
+ from .common import DbVisit, PathWithMtime, Url
11
12
  from .database.load import row_to_db_visit
12
13
 
13
14
  # TODO include latest too?
@@ -19,14 +20,11 @@ def get_logger():
19
20
  # TODO return error depending on severity?
20
21
 
21
22
 
22
- from typing import TypeVar, Sequence
23
-
24
-
25
23
  T = TypeVar('T')
26
24
 
27
25
  def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
28
- def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
29
- res: Dict[str, List[T]] = {}
26
+ def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
27
+ res: dict[str, list[T]] = {}
30
28
  for a in s:
31
29
  k = key(a)
32
30
  ll = res.get(k, None)
@@ -39,9 +37,9 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
39
37
  db = make_dict(sb)
40
38
  ka = set(da.keys())
41
39
  kb = set(db.keys())
42
- onlya: Set[T] = set()
43
- common: Set[T] = set()
44
- onlyb: Set[T] = set()
40
+ onlya: set[T] = set()
41
+ common: set[T] = set()
42
+ onlyb: set[T] = set()
45
43
  for k in ka.union(kb):
46
44
  la = da.get(k, [])
47
45
  lb = db.get(k, [])
@@ -54,13 +52,13 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
54
52
  return onlya, common, onlyb
55
53
 
56
54
 
57
- def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
55
+ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
58
56
  logger = get_logger()
59
57
  logger.info('comparing between: %s', between)
60
58
 
61
- errors: List[DbVisit] = []
59
+ errors: list[DbVisit] = []
62
60
 
63
- umap: Dict[Url, List[DbVisit]] = {}
61
+ umap: dict[Url, list[DbVisit]] = {}
64
62
  for a in after:
65
63
  url = a.norm_url
66
64
  xx = umap.get(url, []) # TODO canonify here?
@@ -71,7 +69,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
71
69
  errors.append(b)
72
70
  if log:
73
71
  logger.error('between %s missing %s', between, b)
74
- print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
72
+ print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
75
73
 
76
74
 
77
75
  # the idea is that we eliminate items simultaneously from both sets
@@ -108,7 +106,7 @@ def get_files(args):
108
106
  if len(args.paths) == 0:
109
107
  int_dir = args.intermediate_dir
110
108
  assert int_dir.exists()
111
- files = list(sorted(int_dir.glob('*.sqlite*')))
109
+ files = sorted(int_dir.glob('*.sqlite*'))
112
110
  files = files[-args.last:]
113
111
  else:
114
112
  files = [Path(p) for p in args.paths]
@@ -126,7 +124,7 @@ def main():
126
124
  sys.exit(1)
127
125
 
128
126
 
129
- def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
127
+ def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
130
128
  assert len(files) > 0
131
129
 
132
130
  logger = get_logger()
@@ -139,7 +137,7 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
139
137
  name = f.name
140
138
  this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
141
139
 
142
- from promnesia.server import _get_stuff # TODO ugh
140
+ from promnesia.server import _get_stuff # TODO ugh
143
141
  engine, table = _get_stuff(PathWithMtime.make(f))
144
142
 
145
143
  with engine.connect() as conn: