promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +60 -35
  3. promnesia/cannon.py +27 -27
  4. promnesia/common.py +85 -67
  5. promnesia/compare.py +21 -22
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +23 -23
  8. promnesia/database/common.py +67 -0
  9. promnesia/database/dump.py +188 -0
  10. promnesia/{read_db.py → database/load.py} +16 -17
  11. promnesia/extract.py +14 -11
  12. promnesia/kjson.py +12 -11
  13. promnesia/logging.py +4 -4
  14. promnesia/misc/__init__.pyi +0 -0
  15. promnesia/misc/config_example.py +1 -2
  16. promnesia/misc/install_server.py +7 -9
  17. promnesia/server.py +57 -47
  18. promnesia/sources/__init__.pyi +0 -0
  19. promnesia/sources/auto.py +50 -35
  20. promnesia/sources/auto_logseq.py +6 -5
  21. promnesia/sources/auto_obsidian.py +2 -2
  22. promnesia/sources/browser.py +14 -9
  23. promnesia/sources/browser_legacy.py +26 -16
  24. promnesia/sources/demo.py +19 -3
  25. promnesia/sources/fbmessenger.py +3 -2
  26. promnesia/sources/filetypes.py +16 -7
  27. promnesia/sources/github.py +7 -9
  28. promnesia/sources/guess.py +2 -1
  29. promnesia/sources/hackernews.py +2 -2
  30. promnesia/sources/hpi.py +2 -2
  31. promnesia/sources/html.py +7 -5
  32. promnesia/sources/hypothesis.py +4 -3
  33. promnesia/sources/instapaper.py +2 -2
  34. promnesia/sources/markdown.py +31 -21
  35. promnesia/sources/org.py +27 -13
  36. promnesia/sources/plaintext.py +30 -29
  37. promnesia/sources/pocket.py +3 -2
  38. promnesia/sources/reddit.py +20 -19
  39. promnesia/sources/roamresearch.py +2 -1
  40. promnesia/sources/rss.py +4 -5
  41. promnesia/sources/shellcmd.py +19 -6
  42. promnesia/sources/signal.py +33 -24
  43. promnesia/sources/smscalls.py +2 -2
  44. promnesia/sources/stackexchange.py +4 -3
  45. promnesia/sources/takeout.py +76 -9
  46. promnesia/sources/takeout_legacy.py +24 -12
  47. promnesia/sources/telegram.py +13 -11
  48. promnesia/sources/telegram_legacy.py +18 -7
  49. promnesia/sources/twitter.py +6 -5
  50. promnesia/sources/vcs.py +5 -3
  51. promnesia/sources/viber.py +10 -9
  52. promnesia/sources/website.py +4 -4
  53. promnesia/sources/zulip.py +3 -2
  54. promnesia/sqlite.py +7 -4
  55. promnesia/tests/__init__.py +0 -0
  56. promnesia/tests/common.py +140 -0
  57. promnesia/tests/server_helper.py +67 -0
  58. promnesia/tests/sources/__init__.py +0 -0
  59. promnesia/tests/sources/test_auto.py +65 -0
  60. promnesia/tests/sources/test_filetypes.py +43 -0
  61. promnesia/tests/sources/test_hypothesis.py +39 -0
  62. promnesia/tests/sources/test_org.py +64 -0
  63. promnesia/tests/sources/test_plaintext.py +25 -0
  64. promnesia/tests/sources/test_shellcmd.py +21 -0
  65. promnesia/tests/sources/test_takeout.py +56 -0
  66. promnesia/tests/test_cannon.py +325 -0
  67. promnesia/tests/test_cli.py +40 -0
  68. promnesia/tests/test_compare.py +30 -0
  69. promnesia/tests/test_config.py +289 -0
  70. promnesia/tests/test_db_dump.py +222 -0
  71. promnesia/tests/test_extract.py +65 -0
  72. promnesia/tests/test_extract_urls.py +43 -0
  73. promnesia/tests/test_indexer.py +251 -0
  74. promnesia/tests/test_server.py +291 -0
  75. promnesia/tests/test_traverse.py +39 -0
  76. promnesia/tests/utils.py +35 -0
  77. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
  78. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  79. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  80. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
  81. promnesia/dump.py +0 -105
  82. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  83. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  84. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/common.py CHANGED
@@ -1,26 +1,29 @@
1
1
  from __future__ import annotations
2
2
 
3
- from contextlib import contextmanager
4
- from datetime import datetime, date
5
- from functools import lru_cache
6
- from glob import glob
7
3
  import itertools
8
4
  import logging
9
5
  import os
10
- from pathlib import Path
6
+ import re
11
7
  import shutil
12
- from subprocess import run, PIPE, Popen
8
+ import tempfile
9
+ import warnings
10
+ from collections.abc import Iterable, Sequence
11
+ from contextlib import contextmanager
12
+ from copy import copy
13
+ from datetime import date, datetime
14
+ from functools import lru_cache
15
+ from glob import glob
16
+ from pathlib import Path
17
+ from subprocess import PIPE, Popen, run
13
18
  from timeit import default_timer as timer
14
19
  from types import ModuleType
15
- from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
16
- import warnings
20
+ from typing import TYPE_CHECKING, Callable, NamedTuple, Optional, TypeVar, Union
17
21
 
18
- from more_itertools import intersperse
19
22
  import pytz
23
+ from more_itertools import intersperse
20
24
 
21
25
  from .cannon import canonify
22
26
 
23
-
24
27
  _is_windows = os.name == 'nt'
25
28
 
26
29
  T = TypeVar('T')
@@ -37,14 +40,14 @@ Second = int
37
40
  # TODO hmm. arguably, source and context are almost same things...
38
41
  class Loc(NamedTuple):
39
42
  title: str
40
- href: Optional[str]=None
43
+ href: Optional[str] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
41
44
 
42
45
  @classmethod
43
- def make(cls, title: str, href: Optional[str]=None) -> 'Loc':
46
+ def make(cls, title: str, href: str | None=None) -> Loc:
44
47
  return cls(title=title, href=href)
45
48
 
46
49
  @classmethod
47
- def file(cls, path: PathIsh, line: Optional[int]=None, relative_to: Optional[Path]=None) -> 'Loc':
50
+ def file(cls, path: PathIsh, line: int | None=None, relative_to: Path | None=None) -> Loc:
48
51
  lstr = '' if line is None else f':{line}'
49
52
  # todo loc should be url encoded? dunno.
50
53
  # or use line=? eh. I don't know. Just ask in issues.
@@ -76,13 +79,26 @@ class Loc(NamedTuple):
76
79
  # but generally, it will be
77
80
  # (url|file)(linenumber|json_path|anchor)
78
81
 
82
+
83
+ @lru_cache(None)
84
+ def warn_once(message: str) -> None:
85
+ # you'd think that warnings module already logs warnings only once per line..
86
+ # but sadly it's not the case
87
+ # see https://github.com/karlicoss/python_duplicate_warnings_investigation/blob/master/test.py
88
+ warnings.warn(message, stacklevel=2)
89
+
90
+
91
+ def _warn_no_xdg_mime() -> None:
92
+ warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
93
+
94
+
79
95
  @lru_cache(1)
80
96
  def _detect_mime_handler() -> str:
81
97
  def exists(what: str) -> bool:
82
98
  try:
83
- r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
84
- except FileNotFoundError:
85
- warnings.warn("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
99
+ r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE, check=False)
100
+ except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason
101
+ _warn_no_xdg_mime()
86
102
  return False
87
103
  if r.returncode > 0:
88
104
  warnings.warn('xdg-mime failed') # hopefully rest is in stderr
@@ -102,6 +118,7 @@ def _detect_mime_handler() -> str:
102
118
  result = 'emacs:'
103
119
 
104
120
  # 2. now try to use newer editor:// thing
121
+ # TODO flip order here? should rely on editor:// first?
105
122
 
106
123
  # TODO would be nice to collect warnings and display at the end
107
124
  if not exists('editor'):
@@ -124,12 +141,12 @@ class Visit(NamedTuple):
124
141
  # TODO back to DatetimeIsh, but somehow make compatible to dbcache?
125
142
  dt: datetime
126
143
  locator: Loc
127
- context: Optional[Context] = None
128
- duration: Optional[Second] = None
144
+ context: Context | None = None
145
+ duration: Second | None = None
129
146
  # TODO shit. I need to insert it in chrome db....
130
147
  # TODO gonna be hard to fill retroactively.
131
148
  # spent: Optional[Second] = None
132
- debug: Optional[str] = None
149
+ debug: str | None = None
133
150
 
134
151
  Result = Union[Visit, Exception]
135
152
  Results = Iterable[Result]
@@ -142,12 +159,12 @@ class DbVisit(NamedTuple):
142
159
  orig_url: Url
143
160
  dt: datetime
144
161
  locator: Loc
145
- src: Optional[SourceName] = None
146
- context: Optional[Context] = None
147
- duration: Optional[Second] = None
162
+ src: Optional[SourceName] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
163
+ context: Optional[Context] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
164
+ duration: Optional[Second] = None # noqa: UP007 # looks like hypothesis doesn't like in on python <= 3.9
148
165
 
149
166
  @staticmethod
150
- def make(p: Visit, src: SourceName) -> Res['DbVisit']:
167
+ def make(p: Visit, src: SourceName) -> Res[DbVisit]:
151
168
  try:
152
169
  # hmm, mypy gets a bit confused here.. presumably because datetime is always datetime (but date is not datetime)
153
170
  if isinstance(p.dt, datetime):
@@ -156,7 +173,7 @@ class DbVisit(NamedTuple):
156
173
  # TODO that won't be with timezone..
157
174
  dt = datetime.combine(p.dt, datetime.min.time()) # meh..
158
175
  else:
159
- raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}')
176
+ raise AssertionError(f'unexpected date: {p.dt}, {type(p.dt)}') # noqa: TRY301
160
177
  except Exception as e:
161
178
  return e
162
179
 
@@ -181,6 +198,7 @@ Filter = Callable[[Url], bool]
181
198
 
182
199
 
183
200
  from .logging import LazyLogger
201
+
184
202
  logger = LazyLogger('promnesia', level='DEBUG')
185
203
 
186
204
  def get_logger() -> logging.Logger:
@@ -189,7 +207,6 @@ def get_logger() -> logging.Logger:
189
207
 
190
208
 
191
209
 
192
- import tempfile
193
210
  # kinda singleton
194
211
  @lru_cache(1)
195
212
  def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
@@ -203,7 +220,7 @@ Syntax = str
203
220
 
204
221
  @lru_cache(None)
205
222
  def _get_urlextractor(syntax: Syntax):
206
- from urlextract import URLExtract # type: ignore
223
+ from urlextract import URLExtract # type: ignore
207
224
  u = URLExtract()
208
225
  # https://github.com/lipoja/URLExtract/issues/13
209
226
  if syntax in {'org', 'orgmode', 'org-mode'}: # TODO remove hardcoding..
@@ -234,7 +251,7 @@ def iter_urls(s: str, *, syntax: Syntax='') -> Iterable[Url]:
234
251
  yield _sanitize(u)
235
252
 
236
253
 
237
- def extract_urls(s: str, *, syntax: Syntax='') -> List[Url]:
254
+ def extract_urls(s: str, *, syntax: Syntax='') -> list[Url]:
238
255
  return list(iter_urls(s=s, syntax=syntax))
239
256
 
240
257
 
@@ -259,7 +276,7 @@ class PathWithMtime(NamedTuple):
259
276
  mtime: float
260
277
 
261
278
  @classmethod
262
- def make(cls, p: Path) -> 'PathWithMtime':
279
+ def make(cls, p: Path) -> PathWithMtime:
263
280
  return cls(
264
281
  path=p,
265
282
  mtime=p.stat().st_mtime,
@@ -285,9 +302,10 @@ def _guess_name(thing: PreSource) -> str:
285
302
  guess = thing.__module__
286
303
 
287
304
  dflt = 'promnesia.sources.'
288
- if guess.startswith(dflt):
289
- # meh
290
- guess = guess[len(dflt):]
305
+ guess = guess.removeprefix(dflt)
306
+ if guess == 'config':
307
+ # this happens when we define a lambda in config or something without properly wrapping in Source
308
+ logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
291
309
  return guess
292
310
 
293
311
 
@@ -297,7 +315,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
297
315
  if hasattr(sourceish, 'index'): # must be a module
298
316
  res = getattr(sourceish, 'index')
299
317
  else:
300
- res = sourceish # type: ignore[assignment]
318
+ res = sourceish
301
319
  return res
302
320
 
303
321
 
@@ -317,12 +335,17 @@ class Source:
317
335
  self.extractor: Extractor = lambda: self.ff(*self.args, **self.kwargs)
318
336
  if src is not None:
319
337
  warnings.warn("'src' argument is deprecated, please use 'name' instead", DeprecationWarning)
320
- try:
321
- name_guess = _guess_name(ff)
322
- except:
323
- # todo warn?
324
- name_guess = ''
325
- self.name = name or src or name_guess
338
+ if name != '':
339
+ self.name = name
340
+ elif src != '':
341
+ self.name = src
342
+ else:
343
+ try:
344
+ name_guess = _guess_name(ff)
345
+ except:
346
+ # todo warn?
347
+ name_guess = ''
348
+ self.name = name_guess
326
349
 
327
350
  @property
328
351
  def description(self) -> str:
@@ -341,13 +364,14 @@ Indexer = Source
341
364
  # NOTE: used in configs...
342
365
  def last(path: PathIsh, *parts: str) -> Path:
343
366
  import os.path
344
- pp = os.path.join(str(path), *parts)
345
- return Path(max(glob(pp, recursive=True)))
367
+ pp = os.path.join(str(path), *parts) # noqa: PTH118
368
+ return Path(max(glob(pp, recursive=True))) # noqa: PTH207
346
369
 
347
370
 
348
- from .logging import setup_logger
371
+ from .logging import setup_logger # noqa: F401
349
372
 
350
- from copy import copy
373
+
374
+ # TODO get rid of this? not sure if still necessary
351
375
  def echain(ex: Exception, cause: Exception) -> Exception:
352
376
  e = copy(ex)
353
377
  e.__cause__ = cause
@@ -361,7 +385,6 @@ def echain(ex: Exception, cause: Exception) -> Exception:
361
385
 
362
386
  def slugify(x: str) -> str:
363
387
  # https://stackoverflow.com/a/38766141/706389
364
- import re
365
388
  valid_file_name = re.sub(r'[^\w_.)( -]', '', x)
366
389
  return valid_file_name
367
390
 
@@ -371,7 +394,7 @@ def appdirs():
371
394
  under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
372
395
  # todo actually use test name?
373
396
  name = 'promnesia-test' if under_test else 'promnesia'
374
- import appdirs as ad # type: ignore[import]
397
+ import appdirs as ad # type: ignore[import-untyped]
375
398
  return ad.AppDirs(appname=name)
376
399
 
377
400
 
@@ -388,13 +411,13 @@ def default_cache_dir() -> Path:
388
411
  # make it lazy, otherwise it might crash on module import (e.g. on Windows)
389
412
  # ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
390
413
  @lru_cache(1)
391
- def _magic() -> Callable[[PathIsh], Optional[str]]:
414
+ def _magic() -> Callable[[PathIsh], str | None]:
392
415
  logger = get_logger()
393
416
  try:
394
- import magic # type: ignore
417
+ import magic # type: ignore
395
418
  except Exception as e:
396
419
  logger.exception(e)
397
- defensive_msg: Optional[str] = None
420
+ defensive_msg: str | None = None
398
421
  if isinstance(e, ModuleNotFoundError) and e.name == 'magic':
399
422
  defensive_msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
400
423
  elif isinstance(e, ImportError):
@@ -404,7 +427,7 @@ def _magic() -> Callable[[PathIsh], Optional[str]]:
404
427
  if defensive_msg is not None:
405
428
  logger.warning(defensive_msg)
406
429
  warnings.warn(defensive_msg)
407
- return lambda path: None # stub
430
+ return lambda path: None # stub # noqa: ARG005
408
431
  else:
409
432
  raise e
410
433
  else:
@@ -420,7 +443,7 @@ def _mimetypes():
420
443
  return mimetypes
421
444
 
422
445
 
423
- def mime(path: PathIsh) -> Optional[str]:
446
+ def mime(path: PathIsh) -> str | None:
424
447
  ps = str(path)
425
448
  mimetypes = _mimetypes()
426
449
  # first try mimetypes, it's only using the filename without opening the file
@@ -432,7 +455,7 @@ def mime(path: PathIsh) -> Optional[str]:
432
455
  return magic(ps)
433
456
 
434
457
 
435
- def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
458
+ def find_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
436
459
  prune_dir_args = []
437
460
  ignore_file_args = []
438
461
  if ignore:
@@ -455,19 +478,19 @@ def find_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
455
478
  ]
456
479
 
457
480
 
458
- def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
481
+ def fdfind_args(root: Path, *, follow: bool, ignore: Sequence[str] = ()) -> list[str]:
459
482
  from .config import extra_fd_args
460
483
 
461
484
  ignore_args = []
462
485
  if ignore:
463
486
  # Add a statement that excludes the folder
464
- ignore_args = [['--exclude', f'{n}'] for n in ignore]
487
+ _ignore_args = [['--exclude', f'{n}'] for n in ignore]
465
488
  # Flatten the list of lists
466
- ignore_args_l = list(itertools.chain(*ignore_args))
489
+ ignore_args = list(itertools.chain(*_ignore_args))
467
490
 
468
491
  return [
469
492
  *extra_fd_args(),
470
- *ignore_args_l,
493
+ *ignore_args,
471
494
  *(['--follow'] if follow else []),
472
495
  '--type', 'f',
473
496
  '.',
@@ -475,7 +498,7 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
475
498
  ]
476
499
 
477
500
 
478
- def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable[Path]:
501
+ def traverse(root: Path, *, follow: bool=True, ignore: Sequence[str] = ()) -> Iterable[Path]:
479
502
  if not root.is_dir():
480
503
  yield root
481
504
  return
@@ -516,17 +539,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
516
539
  def get_system_zone() -> str:
517
540
  try:
518
541
  import tzlocal
519
- # note: tzlocal mypy stubs aren't aware of api change yet (see https://github.com/python/typeshed/issues/6038)
520
- try:
521
- # 4.0 way
522
- return tzlocal.get_localzone_name() # type: ignore[attr-defined]
523
- except AttributeError as e:
524
- # 2.0 way
525
- zone = tzlocal.get_localzone().zone # type: ignore[attr-defined]
526
- # see https://github.com/python/typeshed/blame/968fd6d01d23470e0c8368e7ee7c43f54aaedc0e/stubs/pytz/pytz/tzinfo.pyi#L6
527
- # it says all concrete instances should not be None
528
- assert zone is not None
529
- return zone
542
+ return tzlocal.get_localzone_name()
530
543
  except Exception as e:
531
544
  logger.exception(e)
532
545
  logger.error("Couldn't determine system timezone. Falling back to UTC. Please report this as a bug!")
@@ -540,7 +553,7 @@ def get_system_tz() -> pytz.BaseTzInfo:
540
553
  return pytz.timezone(zone)
541
554
  except Exception as e:
542
555
  logger.exception(e)
543
- logger.error(f"Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
556
+ logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
544
557
  return pytz.utc
545
558
 
546
559
  # used in misc/install_server.py
@@ -594,3 +607,8 @@ def is_sqlite_db(x: Path) -> bool:
594
607
  'application/vnd.sqlite3',
595
608
  # TODO this mime can also match wal files/journals, not sure
596
609
  }
610
+
611
+
612
+ if not TYPE_CHECKING:
613
+ # todo deprecate properly --just backwards compat
614
+ from .compat import removeprefix # noqa: F401
promnesia/compare.py CHANGED
@@ -1,13 +1,15 @@
1
- #!/usr/bin/env python3
1
+ from __future__ import annotations
2
+
2
3
  # TODO perhaps make it external script?
3
4
  import argparse
4
- from pathlib import Path
5
5
  import logging
6
6
  import sys
7
- from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
8
-
7
+ from collections.abc import Iterator, Sequence
8
+ from pathlib import Path
9
+ from typing import TypeVar
9
10
 
10
- from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
11
+ from .common import DbVisit, PathWithMtime, Url
12
+ from .database.load import row_to_db_visit
11
13
 
12
14
  # TODO include latest too?
13
15
  # from cconfig import ignore, filtered
@@ -18,14 +20,11 @@ def get_logger():
18
20
  # TODO return error depending on severity?
19
21
 
20
22
 
21
- from typing import TypeVar, Sequence
22
-
23
-
24
23
  T = TypeVar('T')
25
24
 
26
25
  def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
27
- def make_dict(s: Sequence[T]) -> Dict[str, List[T]]:
28
- res: Dict[str, List[T]] = {}
26
+ def make_dict(s: Sequence[T]) -> dict[str, list[T]]:
27
+ res: dict[str, list[T]] = {}
29
28
  for a in s:
30
29
  k = key(a)
31
30
  ll = res.get(k, None)
@@ -38,9 +37,9 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
38
37
  db = make_dict(sb)
39
38
  ka = set(da.keys())
40
39
  kb = set(db.keys())
41
- onlya: Set[T] = set()
42
- common: Set[T] = set()
43
- onlyb: Set[T] = set()
40
+ onlya: set[T] = set()
41
+ common: set[T] = set()
42
+ onlyb: set[T] = set()
44
43
  for k in ka.union(kb):
45
44
  la = da.get(k, [])
46
45
  lb = db.get(k, [])
@@ -53,13 +52,13 @@ def eliminate_by(sa: Sequence[T], sb: Sequence[T], key):
53
52
  return onlya, common, onlyb
54
53
 
55
54
 
56
- def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=True) -> List[DbVisit]:
55
+ def compare(before: list[DbVisit], after: list[DbVisit], between: str, *, log=True) -> list[DbVisit]:
57
56
  logger = get_logger()
58
57
  logger.info('comparing between: %s', between)
59
58
 
60
- errors: List[DbVisit] = []
59
+ errors: list[DbVisit] = []
61
60
 
62
- umap: Dict[Url, List[DbVisit]] = {}
61
+ umap: dict[Url, list[DbVisit]] = {}
63
62
  for a in after:
64
63
  url = a.norm_url
65
64
  xx = umap.get(url, []) # TODO canonify here?
@@ -70,7 +69,7 @@ def compare(before: List[DbVisit], after: List[DbVisit], between: str, *, log=Tr
70
69
  errors.append(b)
71
70
  if log:
72
71
  logger.error('between %s missing %s', between, b)
73
- print('ignoreline "%s", # %s %s' % ('exid', b.norm_url, b.src), file=sys.stderr)
72
+ print('ignoreline "{}", # {} {}'.format('exid', b.norm_url, b.src), file=sys.stderr)
74
73
 
75
74
 
76
75
  # the idea is that we eliminate items simultaneously from both sets
@@ -107,7 +106,7 @@ def get_files(args):
107
106
  if len(args.paths) == 0:
108
107
  int_dir = args.intermediate_dir
109
108
  assert int_dir.exists()
110
- files = list(sorted(int_dir.glob('*.sqlite*')))
109
+ files = sorted(int_dir.glob('*.sqlite*'))
111
110
  files = files[-args.last:]
112
111
  else:
113
112
  files = [Path(p) for p in args.paths]
@@ -125,7 +124,7 @@ def main():
125
124
  sys.exit(1)
126
125
 
127
126
 
128
- def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
127
+ def compare_files(*files: Path, log=True) -> Iterator[tuple[str, DbVisit]]:
129
128
  assert len(files) > 0
130
129
 
131
130
  logger = get_logger()
@@ -138,11 +137,11 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
138
137
  name = f.name
139
138
  this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
140
139
 
141
- from promnesia.server import _get_stuff # TODO ugh
142
- engine, binder, table = _get_stuff(PathWithMtime.make(f))
140
+ from promnesia.server import _get_stuff # TODO ugh
141
+ engine, table = _get_stuff(PathWithMtime.make(f))
143
142
 
144
143
  with engine.connect() as conn:
145
- vis = [binder.from_row(row) for row in conn.execute(table.select())] # type: ignore[var-annotated]
144
+ vis = [row_to_db_visit(row) for row in conn.execute(table.select())]
146
145
 
147
146
  if last is not None:
148
147
  between = f'{last_dts}:{this_dts}'
promnesia/compat.py CHANGED
@@ -1,12 +1,12 @@
1
- ## we used to have compat fixes here for these for python3.7
2
- ## keeping in case any sources depended on compat functions
3
- from subprocess import PIPE, run, check_call, check_output, Popen
4
- from typing import Protocol, Literal
5
- ##
1
+ from typing import TYPE_CHECKING
6
2
 
3
+ if not TYPE_CHECKING:
4
+ ## we used to have compat fixes here for these for python3.7
5
+ ## keeping in case any sources depended on compat functions
6
+ from subprocess import PIPE, Popen, check_call, check_output, run # noqa: F401
7
+ from typing import Literal, Protocol # noqa: F401
8
+ ##
7
9
 
8
- # can remove after python3.9
9
- def removeprefix(text: str, prefix: str) -> str:
10
- if text.startswith(prefix):
11
- return text[len(prefix):]
12
- return text
10
+ # todo deprecate properly
11
+ def removeprefix(text: str, prefix: str) -> str:
12
+ return text.removeprefix(prefix)
promnesia/config.py CHANGED
@@ -1,21 +1,19 @@
1
- from pathlib import Path
2
- import os
3
- from types import ModuleType
4
- from typing import List, Optional, Union, NamedTuple, Iterable, Callable
1
+ from __future__ import annotations
2
+
5
3
  import importlib
6
4
  import importlib.util
5
+ import os
7
6
  import warnings
7
+ from collections.abc import Iterable
8
+ from pathlib import Path
9
+ from types import ModuleType
10
+ from typing import Callable, NamedTuple, Union
8
11
 
9
- from .common import PathIsh, get_tmpdir, appdirs, default_output_dir, default_cache_dir, user_config_file
10
- from .common import Res, Source, DbVisit
11
-
12
+ from .common import DbVisit, PathIsh, Res, Source, default_cache_dir, default_output_dir
12
13
 
13
14
  HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]
14
15
 
15
16
 
16
- from typing import Any
17
-
18
-
19
17
  ModuleName = str
20
18
 
21
19
  # something that can be converted into a proper Source
@@ -24,19 +22,19 @@ ConfigSource = Union[Source, ModuleName, ModuleType]
24
22
 
25
23
  class Config(NamedTuple):
26
24
  # TODO remove default from sources once migrated
27
- SOURCES: List[ConfigSource] = []
25
+ SOURCES: list[ConfigSource] = []
28
26
 
29
27
  # if not specified, uses user data dir
30
- OUTPUT_DIR: Optional[PathIsh] = None
28
+ OUTPUT_DIR: PathIsh | None = None
31
29
 
32
- CACHE_DIR: Optional[PathIsh] = ''
33
- FILTERS: List[str] = []
30
+ CACHE_DIR: PathIsh | None = ''
31
+ FILTERS: list[str] = []
34
32
 
35
- HOOK: Optional[HookT] = None
33
+ HOOK: HookT | None = None
36
34
 
37
35
  #
38
36
  # NOTE: INDEXERS is deprecated, use SOURCES instead
39
- INDEXERS: List[ConfigSource] = []
37
+ INDEXERS: list[ConfigSource] = []
40
38
  #MIME_HANDLER: Optional[str] = None # TODO
41
39
 
42
40
  @property
@@ -68,9 +66,11 @@ class Config(NamedTuple):
68
66
  yield Source(r)
69
67
 
70
68
  @property
71
- def cache_dir(self) -> Optional[Path]:
69
+ def cache_dir(self) -> Path | None:
70
+ # TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
71
+ # keeping just in case for now
72
72
  cd = self.CACHE_DIR
73
- cpath: Optional[Path]
73
+ cpath: Path | None
74
74
  if cd is None:
75
75
  cpath = None # means 'disabled' in cachew
76
76
  elif cd == '': # meh.. but need to make it None friendly..
@@ -94,10 +94,10 @@ class Config(NamedTuple):
94
94
  return self.output_dir / 'promnesia.sqlite'
95
95
 
96
96
  @property
97
- def hook(self) -> Optional[HookT]:
97
+ def hook(self) -> HookT | None:
98
98
  return self.HOOK
99
99
 
100
- instance: Optional[Config] = None
100
+ instance: Config | None = None
101
101
 
102
102
 
103
103
  def has() -> bool:
@@ -127,7 +127,7 @@ def import_config(config_file: PathIsh) -> Config:
127
127
  spec = importlib.util.spec_from_file_location(name, p); assert spec is not None
128
128
  mod = importlib.util.module_from_spec(spec); assert mod is not None
129
129
  loader = spec.loader; assert loader is not None
130
- loader.exec_module(mod) # type: ignore[attr-defined]
130
+ loader.exec_module(mod)
131
131
 
132
132
  d = {}
133
133
  for f in Config._fields:
@@ -137,7 +137,7 @@ def import_config(config_file: PathIsh) -> Config:
137
137
 
138
138
 
139
139
  # TODO: ugh. this causes warnings to be repeated multiple times... need to reuse the pool or something..
140
- def use_cores() -> Optional[int]:
140
+ def use_cores() -> int | None:
141
141
  '''
142
142
  Somewhat experimental.
143
143
  For now only used in sources.auto, perhaps later will be shared among the other indexers.
@@ -152,7 +152,7 @@ def use_cores() -> Optional[int]:
152
152
  return 0
153
153
 
154
154
 
155
- def extra_fd_args() -> List[str]:
155
+ def extra_fd_args() -> list[str]:
156
156
  '''
157
157
  Not sure where it belongs yet... so via env variable for now
158
158
  Can be used to pass --ignore-file parameter
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from datetime import datetime
5
+
6
+ from sqlalchemy import (
7
+ Column,
8
+ Integer,
9
+ String,
10
+ )
11
+
12
+ # TODO maybe later move DbVisit here completely?
13
+ # kinda an issue that it's technically an "api" because hook in config can patch up DbVisit
14
+ from ..common import DbVisit, Loc
15
+
16
+
17
+ def get_columns() -> Sequence[Column]:
18
+ # fmt: off
19
+ res: Sequence[Column] = [
20
+ Column('norm_url' , String()),
21
+ Column('orig_url' , String()),
22
+ Column('dt' , String()),
23
+ Column('locator_title', String()),
24
+ Column('locator_href' , String()),
25
+ Column('src' , String()),
26
+ Column('context' , String()),
27
+ Column('duration' , Integer())
28
+ ]
29
+ # fmt: on
30
+ assert len(res) == len(DbVisit._fields) + 1 # +1 because Locator is 'flattened'
31
+ return res
32
+
33
+
34
+ def db_visit_to_row(v: DbVisit) -> tuple:
35
+ # ugh, very hacky...
36
+ # we want to make sure the resulting tuple only consists of simple types
37
+ # so we can use dbengine directly
38
+ dt_s = v.dt.isoformat()
39
+ row = (
40
+ v.norm_url,
41
+ v.orig_url,
42
+ dt_s,
43
+ v.locator.title,
44
+ v.locator.href,
45
+ v.src,
46
+ v.context,
47
+ v.duration,
48
+ )
49
+ return row
50
+
51
+
52
+ def row_to_db_visit(row: Sequence) -> DbVisit:
53
+ (norm_url, orig_url, dt_s, locator_title, locator_href, src, context, duration) = row
54
+ dt_s = dt_s.split()[0] # backwards compatibility: previously it could be a string separated with tz name
55
+ dt = datetime.fromisoformat(dt_s)
56
+ return DbVisit(
57
+ norm_url=norm_url,
58
+ orig_url=orig_url,
59
+ dt=dt,
60
+ locator=Loc(
61
+ title=locator_title,
62
+ href=locator_href,
63
+ ),
64
+ src=src,
65
+ context=context,
66
+ duration=duration,
67
+ )