promnesia 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. promnesia/__main__.py +58 -50
  2. promnesia/cannon.py +4 -4
  3. promnesia/common.py +57 -38
  4. promnesia/compare.py +3 -2
  5. promnesia/compat.py +6 -65
  6. promnesia/config.py +4 -2
  7. promnesia/database/common.py +66 -0
  8. promnesia/database/dump.py +187 -0
  9. promnesia/{read_db.py → database/load.py} +10 -11
  10. promnesia/extract.py +1 -0
  11. promnesia/kjson.py +1 -1
  12. promnesia/logging.py +14 -14
  13. promnesia/misc/__init__.pyi +0 -0
  14. promnesia/misc/config_example.py +1 -2
  15. promnesia/misc/install_server.py +5 -4
  16. promnesia/server.py +24 -24
  17. promnesia/sources/__init__.pyi +0 -0
  18. promnesia/sources/auto.py +12 -7
  19. promnesia/sources/browser.py +80 -293
  20. promnesia/sources/browser_legacy.py +298 -0
  21. promnesia/sources/demo.py +18 -2
  22. promnesia/sources/filetypes.py +8 -0
  23. promnesia/sources/github.py +2 -2
  24. promnesia/sources/hackernews.py +1 -2
  25. promnesia/sources/hypothesis.py +1 -1
  26. promnesia/sources/markdown.py +15 -15
  27. promnesia/sources/org.py +7 -3
  28. promnesia/sources/plaintext.py +3 -1
  29. promnesia/sources/reddit.py +2 -2
  30. promnesia/sources/rss.py +5 -1
  31. promnesia/sources/shellcmd.py +6 -2
  32. promnesia/sources/signal.py +29 -20
  33. promnesia/sources/smscalls.py +8 -1
  34. promnesia/sources/stackexchange.py +2 -2
  35. promnesia/sources/takeout.py +132 -12
  36. promnesia/sources/takeout_legacy.py +10 -2
  37. promnesia/sources/telegram.py +79 -123
  38. promnesia/sources/telegram_legacy.py +117 -0
  39. promnesia/sources/vcs.py +1 -1
  40. promnesia/sources/viber.py +6 -15
  41. promnesia/sources/website.py +1 -1
  42. promnesia/sqlite.py +42 -0
  43. promnesia/tests/__init__.py +0 -0
  44. promnesia/tests/common.py +137 -0
  45. promnesia/tests/server_helper.py +64 -0
  46. promnesia/tests/sources/__init__.py +0 -0
  47. promnesia/tests/sources/test_auto.py +66 -0
  48. promnesia/tests/sources/test_filetypes.py +42 -0
  49. promnesia/tests/sources/test_hypothesis.py +39 -0
  50. promnesia/tests/sources/test_org.py +65 -0
  51. promnesia/tests/sources/test_plaintext.py +26 -0
  52. promnesia/tests/sources/test_shellcmd.py +22 -0
  53. promnesia/tests/sources/test_takeout.py +58 -0
  54. promnesia/tests/test_cannon.py +325 -0
  55. promnesia/tests/test_cli.py +42 -0
  56. promnesia/tests/test_compare.py +30 -0
  57. promnesia/tests/test_config.py +290 -0
  58. promnesia/tests/test_db_dump.py +223 -0
  59. promnesia/tests/test_extract.py +61 -0
  60. promnesia/tests/test_extract_urls.py +43 -0
  61. promnesia/tests/test_indexer.py +245 -0
  62. promnesia/tests/test_server.py +292 -0
  63. promnesia/tests/test_traverse.py +41 -0
  64. promnesia/tests/utils.py +35 -0
  65. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +14 -19
  66. promnesia-1.2.20240810.dist-info/RECORD +83 -0
  67. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
  68. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
  69. promnesia/dump.py +0 -105
  70. promnesia-1.1.20230129.dist-info/RECORD +0 -55
  71. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
  72. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0
promnesia/__main__.py CHANGED
@@ -1,21 +1,25 @@
1
+ from __future__ import annotations
2
+
1
3
  import argparse
2
- import logging
4
+ import ast
5
+ import importlib
3
6
  import inspect
4
- import sys
5
- from typing import List, Tuple, Optional, Dict, Sequence, Iterable, Iterator, Union
7
+ import os
6
8
  from pathlib import Path
7
- from datetime import datetime
8
- from .compat import check_call, register_argparse_extend_action_in_pre_py38
9
- from tempfile import TemporaryDirectory
9
+ import shutil
10
+ from subprocess import run, check_call, Popen
11
+ import sys
12
+ from tempfile import TemporaryDirectory, gettempdir
13
+ from typing import Callable, Sequence, Iterable, Iterator, Union
10
14
 
11
15
 
12
16
  from . import config
13
17
  from . import server
14
18
  from .misc import install_server
15
- from .common import PathIsh, logger, get_tmpdir, DbVisit, Res
19
+ from .common import Extractor, PathIsh, logger, get_tmpdir, DbVisit, Res
16
20
  from .common import Source, get_system_tz, user_config_file, default_config_path
17
- from .dump import visits_to_sqlite
18
- from .extract import extract_visits, make_filter
21
+ from .database.dump import visits_to_sqlite
22
+ from .extract import extract_visits
19
23
 
20
24
 
21
25
  def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]:
@@ -36,7 +40,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
36
40
 
37
41
  for i, source in enumerate(sources):
38
42
  # TODO why would it not be present??
39
- name = getattr(source, "name", None)
43
+ name: str | None = getattr(source, "name", None)
40
44
  if name and is_subset_sources:
41
45
  matched = name in sources_subset or i in sources_subset
42
46
  if matched:
@@ -72,7 +76,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
72
76
 
73
77
  def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]:
74
78
  # also keep & return errors for further display
75
- errors: List[Exception] = []
79
+ errors: list[Exception] = []
76
80
  def it() -> Iterable[Res[DbVisit]]:
77
81
  for v in iter_all_visits(sources_subset):
78
82
  if isinstance(v, Exception):
@@ -93,38 +97,38 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
93
97
 
94
98
 
95
99
  def do_index(
96
- config_file: Path,
97
- dry: bool=False,
98
- sources_subset: Iterable[Union[str, int]]=(),
99
- overwrite_db: bool=False,
100
- ) -> None:
100
+ config_file: Path,
101
+ dry: bool=False,
102
+ sources_subset: Iterable[Union[str, int]]=(),
103
+ overwrite_db: bool=False,
104
+ ) -> Sequence[Exception]:
101
105
  config.load_from(config_file) # meh.. should be cleaner
102
106
  try:
103
107
  errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
104
108
  finally:
109
+ # this reset is mainly for tests, so we don't end up reusing the same config by accident
105
110
  config.reset()
106
111
  if len(errors) > 0:
107
112
  logger.error('%d errors, printing them out:', len(errors))
108
113
  for e in errors:
109
114
  logger.exception(e)
110
115
  logger.error('%d errors, exit code 1', len(errors))
111
- sys.exit(1)
116
+ return errors
112
117
 
113
118
 
114
- def demo_sources():
115
- def lazy(name: str):
119
+ def demo_sources() -> dict[str, Callable[[], Extractor]]:
120
+ def lazy(name: str) -> Callable[[], Extractor]:
116
121
  # helper to avoid failed imports etc, since people might be lacking necessary dependencies
117
- def inner(*args, **kwargs):
122
+ def inner() -> Extractor:
118
123
  from . import sources
119
- import importlib
120
- module = importlib.import_module('promnesia.sources' + '.' + name)
124
+ module = importlib.import_module(f'promnesia.sources.{name}')
121
125
  return getattr(module, 'index')
122
126
  return inner
123
127
 
124
128
  res = {}
125
- import ast
126
129
  import promnesia.sources
127
- for p in promnesia.sources.__path__: # type: ignore[attr-defined] # should be present
130
+ path: list[str] = getattr(promnesia.sources, '__path__') # should be present
131
+ for p in path:
128
132
  for x in sorted(Path(p).glob('*.py')):
129
133
  a = ast.parse(x.read_text())
130
134
  candidates = [c for c in a.body if getattr(c, 'name', None) == 'index']
@@ -137,14 +141,13 @@ def do_demo(
137
141
  *,
138
142
  index_as: str,
139
143
  params: Sequence[str],
140
- port: Optional[str],
141
- config_file: Optional[Path],
144
+ port: str | None,
145
+ config_file: Path | None,
142
146
  dry: bool=False,
143
147
  name: str='demo',
144
148
  sources_subset: Iterable[Union[str, int]]=(),
145
149
  overwrite_db: bool=False,
146
150
  ) -> None:
147
- from pprint import pprint
148
151
  with TemporaryDirectory() as tdir:
149
152
  outdir = Path(tdir)
150
153
 
@@ -185,7 +188,6 @@ def do_demo(
185
188
 
186
189
 
187
190
  def read_example_config() -> str:
188
- import inspect
189
191
  from .misc import config_example
190
192
  return inspect.getsource(config_example)
191
193
 
@@ -214,19 +216,25 @@ def config_check(args: argparse.Namespace) -> None:
214
216
 
215
217
 
216
218
  def _config_check(cfg: Path) -> Iterable[Exception]:
217
- from .compat import run
218
-
219
219
  logger.info('config: %s', cfg)
220
220
 
221
- def check(cmd) -> Iterable[Exception]:
221
+ def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
222
222
  logger.debug(' '.join(map(str, cmd)))
223
- res = run(cmd)
223
+ res = run(cmd, **kwargs)
224
224
  if res.returncode > 0:
225
225
  yield Exception()
226
226
 
227
227
  logger.info('Checking syntax...')
228
- cmd = [sys.executable, '-m', 'compileall', cfg]
229
- yield from check(cmd)
228
+ cmd: list[str | Path] = [sys.executable, '-m', 'compileall', cfg]
229
+ yield from check(
230
+ cmd,
231
+ env={
232
+ **os.environ,
233
+ # if config is on read only partition, the command would fail due to generated bytecode
234
+ # so put it in the temporary directory instead
235
+ 'PYTHONPYCACHEPREFIX': gettempdir()
236
+ },
237
+ )
230
238
 
231
239
  # todo not sure if should be more defensive than check_call here
232
240
  logger.info('Checking type safety...')
@@ -265,18 +273,16 @@ def cli_doctor_db(args: argparse.Namespace) -> None:
265
273
  check_call(cmd)
266
274
 
267
275
  bro = 'sqlitebrowser'
268
- import shutil
269
276
  if not shutil.which(bro):
270
277
  logger.error(f'Install {bro} to inspect the database!')
271
278
  sys.exit(1)
272
279
  cmd = [bro, str(db)]
273
280
  logger.debug(f'Running {cmd}')
274
- from .compat import Popen
275
281
  Popen(cmd)
276
282
 
277
283
 
278
284
  def cli_doctor_server(args: argparse.Namespace) -> None:
279
- port = args.port
285
+ port: str = args.port
280
286
  endpoint = f'http://localhost:{port}/status'
281
287
  cmd = ['curl', endpoint]
282
288
  logger.info(f'Running {cmd}')
@@ -296,12 +302,11 @@ def _ordinal_or_name(s: str) -> Union[str, int]:
296
302
  def main() -> None:
297
303
  # TODO longer, literate description?
298
304
 
299
- def add_index_args(parser: argparse.ArgumentParser, default_config_path: Optional[PathIsh]=None) -> None:
305
+ def add_index_args(parser: argparse.ArgumentParser, default_config_path: PathIsh | None = None) -> None:
300
306
  """
301
307
  :param default_config_path:
302
308
  if not given, all :func:`demo_sources()` are run
303
309
  """
304
- register_argparse_extend_action_in_pre_py38(parser)
305
310
  parser.add_argument('--config', type=Path, default=default_config_path, help='Config path')
306
311
  parser.add_argument('--dry', action='store_true', help="Dry run, won't touch the database, only print the results out")
307
312
  parser.add_argument(
@@ -322,14 +327,14 @@ def main() -> None:
322
327
  )
323
328
 
324
329
  F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
325
- p = argparse.ArgumentParser(formatter_class=F) # type: ignore
330
+ p = argparse.ArgumentParser(formatter_class=F)
326
331
  subp = p.add_subparsers(dest='mode', )
327
332
  ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
328
333
  add_index_args(ep, default_config_path())
329
334
  # TODO use some way to override or provide config only via cmdline?
330
335
  ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it")
331
336
 
332
- sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
337
+ sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F)
333
338
  server.setup_parser(sp)
334
339
 
335
340
  ap = subp.add_parser('demo', help='Demo mode: index and serve a directory in single command', formatter_class=F)
@@ -378,8 +383,9 @@ def main() -> None:
378
383
 
379
384
  args = p.parse_args()
380
385
 
386
+ mode: str | None = args.mode
381
387
  # TODO is there a way to print full help? i.e. for all subparsers
382
- if args.mode is None:
388
+ if mode is None:
383
389
  print('ERROR: Please specify a mode', file=sys.stderr)
384
390
  p.print_help(sys.stderr)
385
391
  sys.exit(1)
@@ -391,16 +397,18 @@ def main() -> None:
391
397
  # worst case -- could use database?
392
398
 
393
399
  with get_tmpdir() as tdir: # TODO??
394
- if args.mode == 'index':
395
- do_index(
400
+ if mode == 'index':
401
+ errors = do_index(
396
402
  config_file=args.config,
397
403
  dry=args.dry,
398
404
  sources_subset=args.sources,
399
405
  overwrite_db=args.overwrite,
400
406
  )
401
- elif args.mode == 'serve':
407
+ if len(errors) > 0:
408
+ sys.exit(1)
409
+ elif mode == 'serve':
402
410
  server.run(args)
403
- elif args.mode == 'demo':
411
+ elif mode == 'demo':
404
412
  # TODO not sure if 'as' is that useful
405
413
  # something like Telegram/Takeout is too hard to setup to justify adhoc mode like this?
406
414
  do_demo(
@@ -413,14 +421,14 @@ def main() -> None:
413
421
  sources_subset=args.sources,
414
422
  overwrite_db=args.overwrite,
415
423
  )
416
- elif args.mode == 'install-server': # todo rename to 'autostart' or something?
424
+ elif mode == 'install-server': # todo rename to 'autostart' or something?
417
425
  install_server.install(args)
418
- elif args.mode == 'config':
426
+ elif mode == 'config':
419
427
  args.func(args)
420
- elif args.mode == 'doctor':
428
+ elif mode == 'doctor':
421
429
  args.func(args)
422
430
  else:
423
- raise AssertionError(f'unexpected mode {args.mode}')
431
+ raise AssertionError(f'unexpected mode {mode}')
424
432
 
425
433
  if __name__ == '__main__':
426
434
  main()
promnesia/cannon.py CHANGED
@@ -422,7 +422,7 @@ def canonify(url: str) -> str:
422
422
  qq = [(k, v) for i, k, v in sorted(iqq)]
423
423
  # TODO still not sure what we should do..
424
424
  # quote_plus replaces %20 with +, not sure if we want it...
425
- query = urlencode(qq, quote_via=quote_via) # type: ignore[type-var]
425
+ query = urlencode(qq, quote_via=quote_via)
426
426
 
427
427
  path = _quote_path(path)
428
428
 
@@ -683,7 +683,7 @@ def domains(it): # pragma: no cover
683
683
  try:
684
684
  nurl = canonify(url)
685
685
  except CanonifyException as e:
686
- print(f"ERROR while normalising! {nurl} {e}")
686
+ print(f"ERROR while normalising! {url} {e}")
687
687
  c['ERROR'] += 1
688
688
  continue
689
689
  else:
@@ -718,7 +718,7 @@ def groups(it, args): # pragma: no cover
718
718
  try:
719
719
  nurl = canonify(url)
720
720
  except CanonifyException as e:
721
- print(f"ERROR while normalising! {nurl} {e}")
721
+ print(f"ERROR while normalising! {url} {e}")
722
722
  continue
723
723
  udom = nurl[:nurl.find('/')]
724
724
  usplit = udom.split('.')
@@ -818,7 +818,7 @@ def main() -> None: # pragma: no cover
818
818
 
819
819
  - running comparison
820
820
  sqlite3 promnesia.sqlite 'select distinct orig_url from visits where norm_url like "%twitter%" order by orig_url' | src/promnesia/cannon.py
821
- ''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100) # type: ignore
821
+ ''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100)
822
822
  )
823
823
  p.add_argument('input', nargs='?')
824
824
  p.add_argument('--human', action='store_true')
promnesia/common.py CHANGED
@@ -1,22 +1,25 @@
1
- from collections.abc import Sized
1
+ from __future__ import annotations
2
+
2
3
  from contextlib import contextmanager
3
4
  from datetime import datetime, date
4
- import os
5
- from typing import NamedTuple, Set, Iterable, Dict, TypeVar, Callable, List, Optional, Union, Any, Collection, Sequence, Tuple, TypeVar, TYPE_CHECKING
6
- from pathlib import Path
5
+ from functools import lru_cache
7
6
  from glob import glob
8
7
  import itertools
9
- from more_itertools import intersperse
10
8
  import logging
11
- from functools import lru_cache
9
+ import os
10
+ from pathlib import Path
12
11
  import shutil
12
+ from subprocess import run, PIPE, Popen
13
13
  from timeit import default_timer as timer
14
14
  from types import ModuleType
15
+ from typing import NamedTuple, Iterable, TypeVar, Callable, List, Optional, Union, TypeVar
15
16
  import warnings
16
17
 
18
+ from more_itertools import intersperse
17
19
  import pytz
18
20
 
19
21
  from .cannon import canonify
22
+ from .compat import removeprefix
20
23
 
21
24
 
22
25
  _is_windows = os.name == 'nt'
@@ -74,14 +77,26 @@ class Loc(NamedTuple):
74
77
  # but generally, it will be
75
78
  # (url|file)(linenumber|json_path|anchor)
76
79
 
80
+
81
+ @lru_cache(None)
82
+ def warn_once(message: str) -> None:
83
+ # you'd think that warnings module already logs warnings only once per line..
84
+ # but sadly it's not the case
85
+ # see https://github.com/karlicoss/python_duplicate_warnings_investigation/blob/master/test.py
86
+ warnings.warn(message, stacklevel=2)
87
+
88
+
89
+ def _warn_no_xdg_mime() -> None:
90
+ warn_once("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
91
+
92
+
77
93
  @lru_cache(1)
78
94
  def _detect_mime_handler() -> str:
79
95
  def exists(what: str) -> bool:
80
- from .compat import run, PIPE
81
96
  try:
82
97
  r = run(f'xdg-mime query default x-scheme-handler/{what}'.split(), stdout=PIPE)
83
- except FileNotFoundError:
84
- warnings.warn("No xdg-mime on your OS! If you're on OSX, perhaps you can help me! https://github.com/karlicoss/open-in-editor/issues/1")
98
+ except (FileNotFoundError, NotADirectoryError): # ugh seems that osx might throw NotADirectory for some reason
99
+ _warn_no_xdg_mime()
85
100
  return False
86
101
  if r.returncode > 0:
87
102
  warnings.warn('xdg-mime failed') # hopefully rest is in stderr
@@ -101,6 +116,7 @@ def _detect_mime_handler() -> str:
101
116
  result = 'emacs:'
102
117
 
103
118
  # 2. now try to use newer editor:// thing
119
+ # TODO flip order here? should rely on editor:// first?
104
120
 
105
121
  # TODO would be nice to collect warnings and display at the end
106
122
  if not exists('editor'):
@@ -191,7 +207,7 @@ def get_logger() -> logging.Logger:
191
207
  import tempfile
192
208
  # kinda singleton
193
209
  @lru_cache(1)
194
- def get_tmpdir() -> tempfile.TemporaryDirectory:
210
+ def get_tmpdir() -> tempfile.TemporaryDirectory[str]:
195
211
  # todo use appdirs?
196
212
  tdir = tempfile.TemporaryDirectory(suffix="promnesia")
197
213
  return tdir
@@ -284,9 +300,10 @@ def _guess_name(thing: PreSource) -> str:
284
300
  guess = thing.__module__
285
301
 
286
302
  dflt = 'promnesia.sources.'
287
- if guess.startswith(dflt):
288
- # meh
289
- guess = guess[len(dflt):]
303
+ guess = removeprefix(guess, prefix=dflt)
304
+ if guess == 'config':
305
+ # this happens when we define a lambda in config or something without properly wrapping in Source
306
+ logger.warning(f'Inferred source name "config" for {thing}. This might be misleading TODO')
290
307
  return guess
291
308
 
292
309
 
@@ -296,7 +313,7 @@ def _get_index_function(sourceish: PreSource) -> PreExtractor:
296
313
  if hasattr(sourceish, 'index'): # must be a module
297
314
  res = getattr(sourceish, 'index')
298
315
  else:
299
- res = sourceish # type: ignore[assignment]
316
+ res = sourceish
300
317
  return res
301
318
 
302
319
 
@@ -316,12 +333,17 @@ class Source:
316
333
  self.extractor: Extractor = lambda: self.ff(*self.args, **self.kwargs)
317
334
  if src is not None:
318
335
  warnings.warn("'src' argument is deprecated, please use 'name' instead", DeprecationWarning)
319
- try:
320
- name_guess = _guess_name(ff)
321
- except:
322
- # todo warn?
323
- name_guess = ''
324
- self.name = name or src or name_guess
336
+ if name != '':
337
+ self.name = name
338
+ elif src != '':
339
+ self.name = src
340
+ else:
341
+ try:
342
+ name_guess = _guess_name(ff)
343
+ except:
344
+ # todo warn?
345
+ name_guess = ''
346
+ self.name = name_guess
325
347
 
326
348
  @property
327
349
  def description(self) -> str:
@@ -370,7 +392,7 @@ def appdirs():
370
392
  under_test = os.environ.get('PYTEST_CURRENT_TEST') is not None
371
393
  # todo actually use test name?
372
394
  name = 'promnesia-test' if under_test else 'promnesia'
373
- import appdirs as ad # type: ignore[import]
395
+ import appdirs as ad # type: ignore[import-untyped]
374
396
  return ad.AppDirs(appname=name)
375
397
 
376
398
 
@@ -460,13 +482,13 @@ def fdfind_args(root: Path, follow: bool, ignore: List[str]=[]) -> List[str]:
460
482
  ignore_args = []
461
483
  if ignore:
462
484
  # Add a statement that excludes the folder
463
- ignore_args = [['--exclude', f'{n}'] for n in ignore]
485
+ _ignore_args = [['--exclude', f'{n}'] for n in ignore]
464
486
  # Flatten the list of lists
465
- ignore_args_l = list(itertools.chain(*ignore_args))
487
+ ignore_args = list(itertools.chain(*_ignore_args))
466
488
 
467
489
  return [
468
490
  *extra_fd_args(),
469
- *ignore_args_l,
491
+ *ignore_args,
470
492
  *(['--follow'] if follow else []),
471
493
  '--type', 'f',
472
494
  '.',
@@ -491,7 +513,6 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
491
513
  yield from (Path(r) / f for f in files if f not in ignore)
492
514
  return
493
515
 
494
- from .compat import Popen, PIPE
495
516
  cmd = ['find', *find_args(root, follow=follow, ignore=ignore)]
496
517
  # try to use fd.. it cooperates well with gitignore etc, also faster than find
497
518
  for x in ('fd', 'fd-find', 'fdfind'): # has different names on different dists..
@@ -516,17 +537,7 @@ def traverse(root: Path, *, follow: bool=True, ignore: List[str]=[]) -> Iterable
516
537
  def get_system_zone() -> str:
517
538
  try:
518
539
  import tzlocal
519
- # note: tzlocal mypy stubs aren't aware of api change yet (see https://github.com/python/typeshed/issues/6038)
520
- try:
521
- # 4.0 way
522
- return tzlocal.get_localzone_name() # type: ignore[attr-defined]
523
- except AttributeError as e:
524
- # 2.0 way
525
- zone = tzlocal.get_localzone().zone # type: ignore[attr-defined]
526
- # see https://github.com/python/typeshed/blame/968fd6d01d23470e0c8368e7ee7c43f54aaedc0e/stubs/pytz/pytz/tzinfo.pyi#L6
527
- # it says all concrete instances should not be None
528
- assert zone is not None
529
- return zone
540
+ return tzlocal.get_localzone_name()
530
541
  except Exception as e:
531
542
  logger.exception(e)
532
543
  logger.error("Couldn't determine system timezone. Falling back to UTC. Please report this as a bug!")
@@ -540,7 +551,7 @@ def get_system_tz() -> pytz.BaseTzInfo:
540
551
  return pytz.timezone(zone)
541
552
  except Exception as e:
542
553
  logger.exception(e)
543
- logger.error(f"Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
554
+ logger.error("Unknown time zone %s. Falling back to UTC. Please report this as a bug!", zone)
544
555
  return pytz.utc
545
556
 
546
557
  # used in misc/install_server.py
@@ -578,7 +589,7 @@ def default_config_path() -> Path:
578
589
 
579
590
 
580
591
  @contextmanager
581
- def measure(tag: str='', *, logger, unit: str='ms'):
592
+ def measure(tag: str='', *, logger: logging.Logger, unit: str='ms'):
582
593
  before = timer()
583
594
  yield lambda: timer() - before
584
595
  after = timer()
@@ -586,3 +597,11 @@ def measure(tag: str='', *, logger, unit: str='ms'):
586
597
  mult = {'s': 1, 'ms': 10**3, 'us': 10**6}[unit]
587
598
  xx = secs * mult
588
599
  logger.debug(f'[{tag}]: {xx:.1f}{unit} elapsed')
600
+
601
+
602
+ def is_sqlite_db(x: Path) -> bool:
603
+ return x.is_file() and mime(x) in {
604
+ 'application/x-sqlite3',
605
+ 'application/vnd.sqlite3',
606
+ # TODO this mime can also match wal files/journals, not sure
607
+ }
promnesia/compare.py CHANGED
@@ -8,6 +8,7 @@ from typing import Dict, List, Any, NamedTuple, Optional, Iterator, Set, Tuple
8
8
 
9
9
 
10
10
  from .common import DbVisit, Url, PathWithMtime # TODO ugh. figure out pythonpath
11
+ from .database.load import row_to_db_visit
11
12
 
12
13
  # TODO include latest too?
13
14
  # from cconfig import ignore, filtered
@@ -139,10 +140,10 @@ def compare_files(*files: Path, log=True) -> Iterator[Tuple[str, DbVisit]]:
139
140
  this_dts = name[0: name.index('.')] # can't use stem due to multiple extensions..
140
141
 
141
142
  from promnesia.server import _get_stuff # TODO ugh
142
- engine, binder, table = _get_stuff(PathWithMtime.make(f))
143
+ engine, table = _get_stuff(PathWithMtime.make(f))
143
144
 
144
145
  with engine.connect() as conn:
145
- vis = [binder.from_row(row) for row in conn.execute(table.select())] # type: ignore[var-annotated]
146
+ vis = [row_to_db_visit(row) for row in conn.execute(table.select())]
146
147
 
147
148
  if last is not None:
148
149
  between = f'{last_dts}:{this_dts}'
promnesia/compat.py CHANGED
@@ -1,71 +1,12 @@
1
- from pathlib import Path
2
- import sys
3
- from typing import Union, Sequence, List, TYPE_CHECKING
4
-
5
-
6
- PathIsh = Union[Path, str]
7
- Paths = Sequence[PathIsh]
8
-
9
- # TLDR: py37 on windows has an annoying bug.. https://github.com/karlicoss/promnesia/issues/91#issuecomment-701051074
10
- def _fix(args: Paths) -> List[str]:
11
- assert not isinstance(args, str), args # just to prevent shell=True calls...
12
- return list(map(str, args))
13
-
14
-
15
- import argparse
16
-
17
- def register_argparse_extend_action_in_pre_py38(parser: argparse.ArgumentParser):
18
- import sys
19
-
20
- if sys.version_info < (3, 8):
21
-
22
- class ExtendAction(argparse.Action):
23
-
24
- def __call__(self, parser, namespace, values, option_string=None):
25
- items = getattr(namespace, self.dest) or []
26
- items.extend(values)
27
- setattr(namespace, self.dest, items)
28
-
29
-
30
- parser.register('action', 'extend', ExtendAction)
31
-
32
-
33
- import subprocess
34
- from subprocess import PIPE # for convenience?
35
-
36
-
37
- if TYPE_CHECKING:
38
- from subprocess import run, check_call, check_output, Popen
39
- else:
40
- def run(args: Paths, **kwargs) -> subprocess.CompletedProcess:
41
- return subprocess.run(_fix(args), **kwargs)
42
-
43
- def check_call(args: Paths, **kwargs) -> None:
44
- subprocess.check_call(_fix(args), **kwargs)
45
-
46
- def check_output(args: Paths, **kwargs) -> bytes:
47
- return subprocess.check_output(_fix(args), **kwargs)
48
-
49
- def Popen(args: Paths, **kwargs) -> subprocess.Popen:
50
- return subprocess.Popen(_fix(args), **kwargs)
1
+ ## we used to have compat fixes here for these for python3.7
2
+ ## keeping in case any sources depended on compat functions
3
+ from subprocess import PIPE, run, check_call, check_output, Popen
4
+ from typing import Protocol, Literal
5
+ ##
51
6
 
52
7
 
53
8
  # can remove after python3.9
54
9
  def removeprefix(text: str, prefix: str) -> str:
55
10
  if text.startswith(prefix):
56
11
  return text[len(prefix):]
57
- return text
58
-
59
-
60
- # TODO Deprecate instead, they shouldn't be exported form this module
61
- # del PathIsh
62
- # del Paths
63
-
64
- if sys.version_info[:2] >= (3, 8):
65
- from typing import Protocol
66
- else:
67
- if TYPE_CHECKING:
68
- from typing_extensions import Protocol # type: ignore[misc]
69
- else:
70
- # todo could also use NamedTuple?
71
- Protocol = object
12
+ return text
promnesia/config.py CHANGED
@@ -6,7 +6,7 @@ import importlib
6
6
  import importlib.util
7
7
  import warnings
8
8
 
9
- from .common import PathIsh, get_tmpdir, appdirs, default_output_dir, default_cache_dir, user_config_file
9
+ from .common import PathIsh, default_output_dir, default_cache_dir
10
10
  from .common import Res, Source, DbVisit
11
11
 
12
12
 
@@ -69,6 +69,8 @@ class Config(NamedTuple):
69
69
 
70
70
  @property
71
71
  def cache_dir(self) -> Optional[Path]:
72
+ # TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
73
+ # keeping just in case for now
72
74
  cd = self.CACHE_DIR
73
75
  cpath: Optional[Path]
74
76
  if cd is None:
@@ -127,7 +129,7 @@ def import_config(config_file: PathIsh) -> Config:
127
129
  spec = importlib.util.spec_from_file_location(name, p); assert spec is not None
128
130
  mod = importlib.util.module_from_spec(spec); assert mod is not None
129
131
  loader = spec.loader; assert loader is not None
130
- loader.exec_module(mod) # type: ignore[attr-defined]
132
+ loader.exec_module(mod)
131
133
 
132
134
  d = {}
133
135
  for f in Config._fields: