promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +38 -25
  3. promnesia/cannon.py +23 -23
  4. promnesia/common.py +49 -42
  5. promnesia/compare.py +18 -20
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +20 -22
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +14 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +13 -11
  12. promnesia/kjson.py +11 -10
  13. promnesia/logging.py +1 -1
  14. promnesia/misc/install_server.py +7 -8
  15. promnesia/server.py +42 -31
  16. promnesia/sources/auto.py +43 -30
  17. promnesia/sources/auto_logseq.py +6 -5
  18. promnesia/sources/auto_obsidian.py +2 -2
  19. promnesia/sources/browser.py +14 -9
  20. promnesia/sources/browser_legacy.py +17 -13
  21. promnesia/sources/demo.py +7 -7
  22. promnesia/sources/fbmessenger.py +3 -2
  23. promnesia/sources/filetypes.py +9 -7
  24. promnesia/sources/github.py +5 -7
  25. promnesia/sources/guess.py +2 -1
  26. promnesia/sources/hackernews.py +2 -2
  27. promnesia/sources/hpi.py +2 -2
  28. promnesia/sources/html.py +7 -5
  29. promnesia/sources/hypothesis.py +3 -2
  30. promnesia/sources/instapaper.py +2 -2
  31. promnesia/sources/markdown.py +17 -7
  32. promnesia/sources/org.py +20 -10
  33. promnesia/sources/plaintext.py +30 -31
  34. promnesia/sources/pocket.py +3 -2
  35. promnesia/sources/reddit.py +19 -18
  36. promnesia/sources/roamresearch.py +2 -1
  37. promnesia/sources/rss.py +3 -4
  38. promnesia/sources/shellcmd.py +19 -6
  39. promnesia/sources/signal.py +14 -13
  40. promnesia/sources/smscalls.py +2 -2
  41. promnesia/sources/stackexchange.py +3 -2
  42. promnesia/sources/takeout.py +23 -13
  43. promnesia/sources/takeout_legacy.py +15 -11
  44. promnesia/sources/telegram.py +13 -11
  45. promnesia/sources/telegram_legacy.py +18 -7
  46. promnesia/sources/twitter.py +6 -5
  47. promnesia/sources/vcs.py +5 -3
  48. promnesia/sources/viber.py +10 -9
  49. promnesia/sources/website.py +4 -4
  50. promnesia/sources/zulip.py +3 -2
  51. promnesia/sqlite.py +7 -4
  52. promnesia/tests/common.py +8 -5
  53. promnesia/tests/server_helper.py +11 -8
  54. promnesia/tests/sources/test_auto.py +2 -3
  55. promnesia/tests/sources/test_filetypes.py +2 -1
  56. promnesia/tests/sources/test_hypothesis.py +3 -3
  57. promnesia/tests/sources/test_org.py +2 -3
  58. promnesia/tests/sources/test_plaintext.py +0 -1
  59. promnesia/tests/sources/test_shellcmd.py +3 -4
  60. promnesia/tests/sources/test_takeout.py +3 -5
  61. promnesia/tests/test_cannon.py +5 -5
  62. promnesia/tests/test_cli.py +4 -6
  63. promnesia/tests/test_compare.py +1 -1
  64. promnesia/tests/test_config.py +7 -8
  65. promnesia/tests/test_db_dump.py +11 -12
  66. promnesia/tests/test_extract.py +10 -6
  67. promnesia/tests/test_indexer.py +14 -8
  68. promnesia/tests/test_server.py +2 -3
  69. promnesia/tests/test_traverse.py +0 -2
  70. promnesia/tests/utils.py +4 -4
  71. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
  72. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  73. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  74. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  75. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  76. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
  77. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/compat.py CHANGED
@@ -1,12 +1,12 @@
1
- ## we used to have compat fixes here for these for python3.7
2
- ## keeping in case any sources depended on compat functions
3
- from subprocess import PIPE, run, check_call, check_output, Popen
4
- from typing import Protocol, Literal
5
- ##
1
+ from typing import TYPE_CHECKING
6
2
 
3
+ if not TYPE_CHECKING:
4
+ ## we used to have compat fixes here for these for python3.7
5
+ ## keeping in case any sources depended on compat functions
6
+ from subprocess import PIPE, Popen, check_call, check_output, run # noqa: F401
7
+ from typing import Literal, Protocol # noqa: F401
8
+ ##
7
9
 
8
- # can remove after python3.9
9
- def removeprefix(text: str, prefix: str) -> str:
10
- if text.startswith(prefix):
11
- return text[len(prefix):]
12
- return text
10
+ # todo deprecate properly
11
+ def removeprefix(text: str, prefix: str) -> str:
12
+ return text.removeprefix(prefix)
promnesia/config.py CHANGED
@@ -1,21 +1,19 @@
1
- from pathlib import Path
2
- import os
3
- from types import ModuleType
4
- from typing import List, Optional, Union, NamedTuple, Iterable, Callable
1
+ from __future__ import annotations
2
+
5
3
  import importlib
6
4
  import importlib.util
5
+ import os
7
6
  import warnings
7
+ from collections.abc import Iterable
8
+ from pathlib import Path
9
+ from types import ModuleType
10
+ from typing import Callable, NamedTuple, Union
8
11
 
9
- from .common import PathIsh, default_output_dir, default_cache_dir
10
- from .common import Res, Source, DbVisit
11
-
12
+ from .common import DbVisit, PathIsh, Res, Source, default_cache_dir, default_output_dir
12
13
 
13
14
  HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]
14
15
 
15
16
 
16
- from typing import Any
17
-
18
-
19
17
  ModuleName = str
20
18
 
21
19
  # something that can be converted into a proper Source
@@ -24,19 +22,19 @@ ConfigSource = Union[Source, ModuleName, ModuleType]
24
22
 
25
23
  class Config(NamedTuple):
26
24
  # TODO remove default from sources once migrated
27
- SOURCES: List[ConfigSource] = []
25
+ SOURCES: list[ConfigSource] = []
28
26
 
29
27
  # if not specified, uses user data dir
30
- OUTPUT_DIR: Optional[PathIsh] = None
28
+ OUTPUT_DIR: PathIsh | None = None
31
29
 
32
- CACHE_DIR: Optional[PathIsh] = ''
33
- FILTERS: List[str] = []
30
+ CACHE_DIR: PathIsh | None = ''
31
+ FILTERS: list[str] = []
34
32
 
35
- HOOK: Optional[HookT] = None
33
+ HOOK: HookT | None = None
36
34
 
37
35
  #
38
36
  # NOTE: INDEXERS is deprecated, use SOURCES instead
39
- INDEXERS: List[ConfigSource] = []
37
+ INDEXERS: list[ConfigSource] = []
40
38
  #MIME_HANDLER: Optional[str] = None # TODO
41
39
 
42
40
  @property
@@ -68,11 +66,11 @@ class Config(NamedTuple):
68
66
  yield Source(r)
69
67
 
70
68
  @property
71
- def cache_dir(self) -> Optional[Path]:
69
+ def cache_dir(self) -> Path | None:
72
70
  # TODO we used to use this for cachew, but it's best to rely on HPI modules etc to cofigure this
73
71
  # keeping just in case for now
74
72
  cd = self.CACHE_DIR
75
- cpath: Optional[Path]
73
+ cpath: Path | None
76
74
  if cd is None:
77
75
  cpath = None # means 'disabled' in cachew
78
76
  elif cd == '': # meh.. but need to make it None friendly..
@@ -96,10 +94,10 @@ class Config(NamedTuple):
96
94
  return self.output_dir / 'promnesia.sqlite'
97
95
 
98
96
  @property
99
- def hook(self) -> Optional[HookT]:
97
+ def hook(self) -> HookT | None:
100
98
  return self.HOOK
101
99
 
102
- instance: Optional[Config] = None
100
+ instance: Config | None = None
103
101
 
104
102
 
105
103
  def has() -> bool:
@@ -139,7 +137,7 @@ def import_config(config_file: PathIsh) -> Config:
139
137
 
140
138
 
141
139
  # TODO: ugh. this causes warnings to be repeated multiple times... need to reuse the pool or something..
142
- def use_cores() -> Optional[int]:
140
+ def use_cores() -> int | None:
143
141
  '''
144
142
  Somewhat experimental.
145
143
  For now only used in sources.auto, perhaps later will be shared among the other indexers.
@@ -154,7 +152,7 @@ def use_cores() -> Optional[int]:
154
152
  return 0
155
153
 
156
154
 
157
- def extra_fd_args() -> List[str]:
155
+ def extra_fd_args() -> list[str]:
158
156
  '''
159
157
  Not sure where it belongs yet... so via env variable for now
160
158
  Can be used to pass --ignore-file parameter
@@ -1,10 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
1
4
  from datetime import datetime
2
- from typing import Sequence, Tuple
3
5
 
4
6
  from sqlalchemy import (
5
7
  Column,
6
8
  Integer,
7
- Row,
8
9
  String,
9
10
  )
10
11
 
@@ -30,7 +31,7 @@ def get_columns() -> Sequence[Column]:
30
31
  return res
31
32
 
32
33
 
33
- def db_visit_to_row(v: DbVisit) -> Tuple:
34
+ def db_visit_to_row(v: DbVisit) -> tuple:
34
35
  # ugh, very hacky...
35
36
  # we want to make sure the resulting tuple only consists of simple types
36
37
  # so we can use dbengine directly
@@ -1,9 +1,11 @@
1
- from pathlib import Path
1
+ from __future__ import annotations
2
+
2
3
  import sqlite3
3
- from typing import Dict, Iterable, List, Optional, Set
4
+ from collections.abc import Iterable
5
+ from pathlib import Path
6
+ from typing import Optional
4
7
 
5
8
  from more_itertools import chunked
6
-
7
9
  from sqlalchemy import (
8
10
  Engine,
9
11
  MetaData,
@@ -16,6 +18,7 @@ from sqlalchemy import (
16
18
  )
17
19
  from sqlalchemy.dialects import sqlite as dialect_sqlite
18
20
 
21
+ from .. import config
19
22
  from ..common import (
20
23
  DbVisit,
21
24
  Loc,
@@ -24,9 +27,7 @@ from ..common import (
24
27
  get_logger,
25
28
  now_tz,
26
29
  )
27
- from .common import get_columns, db_visit_to_row
28
- from .. import config
29
-
30
+ from .common import db_visit_to_row, get_columns
30
31
 
31
32
  # NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
32
33
  # since as far as sql is concerned it should all be in the same transaction. only a guess
@@ -50,7 +51,7 @@ def begin_immediate_transaction(conn):
50
51
  conn.exec_driver_sql('BEGIN IMMEDIATE')
51
52
 
52
53
 
53
- Stats = Dict[Optional[SourceName], int]
54
+ Stats = dict[Optional[SourceName], int]
54
55
 
55
56
 
56
57
  # returns critical warnings
@@ -58,8 +59,8 @@ def visits_to_sqlite(
58
59
  vit: Iterable[Res[DbVisit]],
59
60
  *,
60
61
  overwrite_db: bool,
61
- _db_path: Optional[Path] = None, # only used in tests
62
- ) -> List[Exception]:
62
+ _db_path: Path | None = None, # only used in tests
63
+ ) -> list[Exception]:
63
64
  if _db_path is None:
64
65
  db_path = config.get().db
65
66
  else:
@@ -95,7 +96,7 @@ def visits_to_sqlite(
95
96
 
96
97
  def query_total_stats(conn) -> Stats:
97
98
  query = select(table.c.src, func.count(table.c.src)).select_from(table).group_by(table.c.src)
98
- return {src: cnt for (src, cnt) in conn.execute(query).all()}
99
+ return dict(conn.execute(query).all())
99
100
 
100
101
  def get_engine(*args, **kwargs) -> Engine:
101
102
  # kwargs['echo'] = True # useful for debugging
@@ -122,7 +123,7 @@ def visits_to_sqlite(
122
123
  # (note that this also requires WAL mode)
123
124
  engine = get_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS})
124
125
 
125
- cleared: Set[str] = set()
126
+ cleared: set[str] = set()
126
127
 
127
128
  # by default, sqlalchemy does some sort of BEGIN (implicit) transaction, which doesn't provide proper isolation??
128
129
  # see https://docs.sqlalchemy.org/en/20/dialects/sqlite.html#serializable-isolation-savepoints-transactional-ddl
@@ -144,7 +145,7 @@ def visits_to_sqlite(
144
145
  insert_stmt_raw = str(insert_stmt.compile(dialect=dialect_sqlite.dialect(paramstyle='qmark')))
145
146
 
146
147
  for chunk in chunked(vit_ok(), n=_CHUNK_BY):
147
- srcs = set(v.src or '' for v in chunk)
148
+ srcs = {v.src or '' for v in chunk}
148
149
  new = srcs.difference(cleared)
149
150
 
150
151
  for src in new:
@@ -181,7 +182,7 @@ def visits_to_sqlite(
181
182
  for k, v in stats_changes.items():
182
183
  logger.info(f'database stats changes: {k} {v}')
183
184
 
184
- res: List[Exception] = []
185
+ res: list[Exception] = []
185
186
  if total_ok == 0:
186
187
  res.append(RuntimeError('No visits were indexed, something is probably wrong!'))
187
188
  return res
@@ -1,19 +1,19 @@
1
+ from __future__ import annotations
2
+
1
3
  from pathlib import Path
2
- from typing import Tuple, List
3
4
 
4
5
  from sqlalchemy import (
5
- create_engine,
6
- exc,
7
6
  Engine,
8
- MetaData,
9
7
  Index,
8
+ MetaData,
10
9
  Table,
10
+ create_engine,
11
+ exc,
11
12
  )
12
13
 
13
14
  from .common import DbVisit, get_columns, row_to_db_visit
14
15
 
15
-
16
- DbStuff = Tuple[Engine, Table]
16
+ DbStuff = tuple[Engine, Table]
17
17
 
18
18
 
19
19
  def get_db_stuff(db_path: Path) -> DbStuff:
@@ -39,7 +39,7 @@ def get_db_stuff(db_path: Path) -> DbStuff:
39
39
  return engine, table
40
40
 
41
41
 
42
- def get_all_db_visits(db_path: Path) -> List[DbVisit]:
42
+ def get_all_db_visits(db_path: Path) -> list[DbVisit]:
43
43
  # NOTE: this is pretty inefficient if the DB is huge
44
44
  # mostly intended for tests
45
45
  engine, table = get_db_stuff(db_path)
promnesia/extract.py CHANGED
@@ -1,20 +1,22 @@
1
- from functools import lru_cache
1
+ from __future__ import annotations
2
+
2
3
  import re
3
- import traceback
4
- from typing import Set, Iterable, Sequence, Union
4
+ from collections.abc import Iterable, Sequence
5
+ from functools import lru_cache
5
6
 
6
7
  from .cannon import CanonifyException
7
8
  from .common import (
8
- logger,
9
- DbVisit, Visit,
10
- Res,
11
- SourceName, Source,
9
+ DbVisit,
12
10
  Filter,
11
+ Res,
12
+ Results,
13
+ Source,
14
+ SourceName,
13
15
  Url,
14
- Results, Extractor,
16
+ Visit,
17
+ logger,
15
18
  )
16
19
 
17
-
18
20
  DEFAULT_FILTERS = (
19
21
  r'^chrome-\w+://',
20
22
  r'chrome://newtab',
@@ -53,7 +55,7 @@ def extract_visits(source: Source, *, src: SourceName) -> Iterable[Res[DbVisit]]
53
55
  yield e
54
56
  return
55
57
 
56
- handled: Set[Visit] = set()
58
+ handled: set[Visit] = set()
57
59
  try:
58
60
  for p in vit:
59
61
  if isinstance(p, Exception):
@@ -94,7 +96,7 @@ def filtered(url: Url) -> bool:
94
96
  return any(f(url) for f in filters())
95
97
 
96
98
 
97
- def make_filter(thing: Union[str, Filter]) -> Filter:
99
+ def make_filter(thing: str | Filter) -> Filter:
98
100
  if isinstance(thing, str):
99
101
  rc = re.compile(thing)
100
102
  def filter_(u: str) -> bool:
promnesia/kjson.py CHANGED
@@ -3,19 +3,19 @@ Some experimental ideas on JSON processing.
3
3
  This is a bit overengineered and I admit it!
4
4
  I'll make it more readable, but in the meantime feel free to open an issue if you're confused about something.
5
5
  """
6
+ from __future__ import annotations
6
7
 
7
- from typing import Any, Dict, List, Union, Tuple, cast
8
+ from typing import Any, Union, cast
8
9
 
9
-
10
- JDict = Dict[str, Any] # TODO not sure if we can do recursive..
11
- JList = List[Any]
10
+ JDict = dict[str, Any] # TODO not sure if we can do recursive..
11
+ JList = list[Any]
12
12
  JPrim = Union[str, int, float] # , type(None)]
13
13
 
14
14
  Json = Union[JDict, JList, JPrim]
15
15
 
16
- JPathPart = Tuple[Json, Union[str, int]]
16
+ JPathPart = tuple[Json, Union[str, int]]
17
17
 
18
- JPath = Tuple[JPathPart, ...]
18
+ JPath = tuple[JPathPart, ...]
19
19
 
20
20
 
21
21
  class JsonProcessor:
@@ -36,7 +36,7 @@ class JsonProcessor:
36
36
  if res is self.SKIP:
37
37
  return
38
38
  for k, v in js.items():
39
- path = cast(JPath, jp + ((js, k), ))
39
+ path = cast(JPath, jp + ((js, k), )) # noqa: RUF005
40
40
  self._do(v, path)
41
41
 
42
42
  def do_list(self, js: JList, jp: JPath) -> None:
@@ -45,7 +45,7 @@ class JsonProcessor:
45
45
  if res is self.SKIP:
46
46
  return
47
47
  for i, x in enumerate(js):
48
- path = cast(JPath, jp + ((js, i), ))
48
+ path = cast(JPath, jp + ((js, i), )) # noqa: RUF005
49
49
  self._do(x, path)
50
50
 
51
51
  def _do(self, js: Json, path: JPath) -> None:
@@ -65,7 +65,7 @@ class JsonProcessor:
65
65
  self._do(js, path)
66
66
 
67
67
  @classmethod
68
- def kpath(cls, path: JPath) -> Tuple[JPathPart, ...]:
68
+ def kpath(cls, path: JPath) -> tuple[JPathPart, ...]:
69
69
  return tuple(x[1] for x in path) # type: ignore
70
70
 
71
71
  # TODO path is a sequence of jsons and keys?
@@ -73,9 +73,10 @@ class JsonProcessor:
73
73
  def test_json_processor():
74
74
  handled = []
75
75
  class Proc(JsonProcessor):
76
- def handle_dict(self, value: JDict, path):
76
+ def handle_dict(self, value: JDict, path): # noqa: ARG002
77
77
  if 'skipme' in self.kpath(path): # type: ignore[comparison-overlap]
78
78
  return JsonProcessor.SKIP
79
+ return None
79
80
 
80
81
  def handle_str(self, value: str, path):
81
82
  if 'http' in value:
promnesia/logging.py CHANGED
@@ -29,9 +29,9 @@ def test() -> None:
29
29
 
30
30
 
31
31
  import logging
32
- from typing import Union, Optional, cast
33
32
  import os
34
33
  import warnings
34
+ from typing import Optional, Union, cast
35
35
 
36
36
  Level = int
37
37
  LevelIsh = Optional[Union[Level, str]]
@@ -1,15 +1,12 @@
1
- #!/usr/bin/env python3
2
1
  from __future__ import annotations
3
2
 
4
3
  import argparse
5
4
  import os
5
+ import platform
6
6
  import sys
7
7
  import time
8
8
  from pathlib import Path
9
- import platform
10
- import shutil
11
9
  from subprocess import check_call, run
12
- from typing import List
13
10
 
14
11
  SYSTEM = platform.system()
15
12
  UNSUPPORTED_SYSTEM = RuntimeError(f'Platform {SYSTEM} is not supported yet!')
@@ -59,7 +56,7 @@ def systemd(*args: str | Path, method=check_call) -> None:
59
56
  ])
60
57
 
61
58
 
62
- def install_systemd(name: str, out: Path, launcher: str, largs: List[str]) -> None:
59
+ def install_systemd(name: str, out: Path, launcher: str, largs: list[str]) -> None:
63
60
  unit_name = name
64
61
 
65
62
  import shlex
@@ -81,7 +78,7 @@ def install_systemd(name: str, out: Path, launcher: str, largs: List[str]) -> No
81
78
  raise e
82
79
 
83
80
 
84
- def install_launchd(name: str, out: Path, launcher: str, largs: List[str]) -> None:
81
+ def install_launchd(name: str, out: Path, launcher: str, largs: list[str]) -> None:
85
82
  service_name = name
86
83
  arguments = '\n'.join(f'<string>{a}</string>' for a in [launcher, *largs])
87
84
  out.write_text(LAUNCHD_TEMPLATE.format(
@@ -116,14 +113,16 @@ def install(args: argparse.Namespace) -> None:
116
113
  print(f"Writing launch script to {out}", file=sys.stderr)
117
114
 
118
115
  # ugh. we want to know whether we're invoked 'properly' as an executable or ad-hoc via scripts/promnesia
116
+ extra_exe: list[str] = []
119
117
  if os.environ.get('DIRTY_RUN') is not None:
120
118
  launcher = str(root() / 'scripts/promnesia')
121
119
  else:
122
- exe = shutil.which('promnesia'); assert exe is not None
123
- launcher = exe # older systemd wants absolute paths..
120
+ launcher = sys.executable
121
+ extra_exe = ['-m', 'promnesia']
124
122
 
125
123
  db = args.db
126
124
  largs = [
125
+ *extra_exe,
127
126
  'serve',
128
127
  *([] if db is None else ['--db', str(db)]),
129
128
  '--timezone', args.timezone,
promnesia/server.py CHANGED
@@ -1,35 +1,45 @@
1
- #!/usr/bin/python3
2
1
  from __future__ import annotations
3
2
 
4
3
  import argparse
5
- from dataclasses import dataclass
6
- from datetime import timedelta
7
- from functools import lru_cache
8
4
  import importlib.metadata
9
5
  import json
10
6
  import logging
11
7
  import os
8
+ from dataclasses import dataclass
9
+ from datetime import timedelta
10
+ from functools import lru_cache
12
11
  from pathlib import Path
13
- from typing import List, NamedTuple, Dict, Optional, Any, Tuple, Protocol
14
-
12
+ from typing import Any, NamedTuple, Optional, Protocol
15
13
 
14
+ import fastapi
16
15
  import pytz
17
16
  from pytz import BaseTzInfo
18
-
19
- import fastapi
20
-
21
- from sqlalchemy import literal, between, or_, and_, exc, select
22
- from sqlalchemy import Column, Table, func, types
23
- from sqlalchemy.sql.elements import ColumnElement
17
+ from sqlalchemy import (
18
+ Column,
19
+ Table,
20
+ and_,
21
+ between,
22
+ exc,
23
+ func,
24
+ literal,
25
+ or_,
26
+ select,
27
+ types,
28
+ )
24
29
  from sqlalchemy.sql import text
30
+ from sqlalchemy.sql.elements import ColumnElement
25
31
 
26
-
27
- from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz
28
32
  from .cannon import canonify
33
+ from .common import (
34
+ DbVisit,
35
+ PathWithMtime,
36
+ default_output_dir,
37
+ get_system_tz,
38
+ setup_logger,
39
+ )
29
40
  from .database.load import DbStuff, get_db_stuff, row_to_db_visit
30
41
 
31
-
32
- Json = Dict[str, Any]
42
+ Json = dict[str, Any]
33
43
 
34
44
  app = fastapi.FastAPI()
35
45
 
@@ -65,7 +75,7 @@ class ServerConfig(NamedTuple):
65
75
  })
66
76
 
67
77
  @classmethod
68
- def from_str(cls, cfgs: str) -> 'ServerConfig':
78
+ def from_str(cls, cfgs: str) -> ServerConfig:
69
79
  d = json.loads(cfgs)
70
80
  return cls(
71
81
  db =Path (d['db']),
@@ -111,7 +121,7 @@ def as_json(v: DbVisit) -> Json:
111
121
  }
112
122
 
113
123
 
114
- def get_db_path(check: bool=True) -> Path:
124
+ def get_db_path(*, check: bool=True) -> Path:
115
125
  db = EnvConfig.get().db
116
126
  if check:
117
127
  assert db.exists(), db
@@ -125,7 +135,7 @@ def _get_stuff(db_path: PathWithMtime) -> DbStuff:
125
135
  return get_db_stuff(db_path=db_path.path)
126
136
 
127
137
 
128
- def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
138
+ def get_stuff(db_path: Path | None=None) -> DbStuff: # TODO better name
129
139
  # ok, it will always load from the same db file; but intermediate would be kinda an optional dump.
130
140
  if db_path is None:
131
141
  db_path = get_db_path()
@@ -136,7 +146,7 @@ def db_stats(db_path: Path) -> Json:
136
146
  engine, table = get_stuff(db_path)
137
147
  query = select(func.count()).select_from(table)
138
148
  with engine.connect() as conn:
139
- total = list(conn.execute(query))[0][0]
149
+ [(total,)] = conn.execute(query)
140
150
  return {
141
151
  'total_visits': total,
142
152
  }
@@ -172,17 +182,17 @@ def search_common(url: str, where: Where) -> VisitsResponse:
172
182
  with engine.connect() as conn:
173
183
  try:
174
184
  # TODO make more defensive here
175
- visits: List[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
185
+ visits: list[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
176
186
  except exc.OperationalError as e:
177
187
  if getattr(e, 'msg', None) == 'no such table: visits':
178
- logger.warn('you may have to run indexer first!')
188
+ logger.warning('you may have to run indexer first!')
179
189
  #result['visits'] = [{an error with a msg}] # TODO
180
190
  #return result
181
191
  raise
182
192
 
183
193
  logger.debug('got %d visits from db', len(visits))
184
194
 
185
- vlist: List[DbVisit] = []
195
+ vlist: list[DbVisit] = []
186
196
  for vis in visits:
187
197
  dt = vis.dt
188
198
  if dt.tzinfo is None: # FIXME need this for /visits endpoint as well?
@@ -225,7 +235,7 @@ def status() -> Json:
225
235
  logger.exception(e)
226
236
  stats = {'ERROR': str(e)}
227
237
 
228
- version: Optional[str]
238
+ version: str | None
229
239
  try:
230
240
  version = get_version()
231
241
  except Exception as e:
@@ -299,7 +309,7 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
299
309
 
300
310
  return search_common(
301
311
  url='http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this
302
- where=lambda table, url: between(
312
+ where=lambda table, url: between( # noqa: ARG005
303
313
  func.strftime(
304
314
  '%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00
305
315
  # this is a bit fragile, relies on cachew internal timestamp format, e.g.
@@ -322,25 +332,26 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
322
332
  _NO_VERSION = (0, 11, 14)
323
333
  _LATEST = (9999, 9999, 9999)
324
334
 
325
- def as_version(version: str) -> Tuple[int, int, int]:
335
+ def as_version(version: str) -> tuple[int, int, int]:
326
336
  if version == '':
327
337
  return _NO_VERSION
328
338
  try:
329
339
  [v1, v2, v3] = map(int, version.split('.'))
330
- return (v1, v2, v3)
331
340
  except Exception as e:
332
341
  logger = get_logger()
333
342
  logger.error('error while parsing version %s', version)
334
343
  logger.exception(e)
335
344
  return _LATEST
345
+ else:
346
+ return (v1, v2, v3)
336
347
 
337
348
 
338
349
  @dataclass
339
350
  class VisitedRequest:
340
- urls: List[str]
351
+ urls: list[str]
341
352
  client_version: str = ''
342
353
 
343
- VisitedResponse = List[Optional[Json]]
354
+ VisitedResponse = list[Optional[Json]]
344
355
 
345
356
  @app.get ('/visited', response_model=VisitedResponse)
346
357
  @app.post('/visited', response_model=VisitedResponse)
@@ -355,7 +366,7 @@ def visited(request: VisitedRequest) -> VisitedResponse:
355
366
  version = as_version(client_version)
356
367
 
357
368
  nurls = [canonify(u) for u in urls]
358
- snurls = list(sorted(set(nurls)))
369
+ snurls = sorted(set(nurls))
359
370
 
360
371
  if len(snurls) == 0:
361
372
  return []
@@ -388,7 +399,7 @@ SELECT queried, visits.*
388
399
  # brings down large queries to 50ms...
389
400
  with engine.connect() as conn:
390
401
  res = list(conn.execute(query))
391
- present: Dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
402
+ present: dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
392
403
  results = []
393
404
  for nu in nurls:
394
405
  r = present.get(nu, None)