promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. promnesia/__init__.py +18 -4
  2. promnesia/__main__.py +104 -78
  3. promnesia/cannon.py +108 -107
  4. promnesia/common.py +107 -88
  5. promnesia/compare.py +33 -30
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +37 -34
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +13 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +19 -17
  12. promnesia/logging.py +27 -15
  13. promnesia/misc/install_server.py +32 -27
  14. promnesia/server.py +106 -79
  15. promnesia/sources/auto.py +104 -77
  16. promnesia/sources/auto_logseq.py +6 -5
  17. promnesia/sources/auto_obsidian.py +2 -2
  18. promnesia/sources/browser.py +20 -10
  19. promnesia/sources/browser_legacy.py +65 -50
  20. promnesia/sources/demo.py +7 -8
  21. promnesia/sources/fbmessenger.py +3 -3
  22. promnesia/sources/filetypes.py +22 -16
  23. promnesia/sources/github.py +9 -8
  24. promnesia/sources/guess.py +6 -2
  25. promnesia/sources/hackernews.py +7 -9
  26. promnesia/sources/hpi.py +5 -3
  27. promnesia/sources/html.py +11 -7
  28. promnesia/sources/hypothesis.py +3 -2
  29. promnesia/sources/instapaper.py +3 -2
  30. promnesia/sources/markdown.py +22 -12
  31. promnesia/sources/org.py +36 -17
  32. promnesia/sources/plaintext.py +41 -39
  33. promnesia/sources/pocket.py +5 -3
  34. promnesia/sources/reddit.py +24 -26
  35. promnesia/sources/roamresearch.py +5 -2
  36. promnesia/sources/rss.py +6 -8
  37. promnesia/sources/shellcmd.py +21 -11
  38. promnesia/sources/signal.py +27 -26
  39. promnesia/sources/smscalls.py +2 -3
  40. promnesia/sources/stackexchange.py +5 -4
  41. promnesia/sources/takeout.py +37 -34
  42. promnesia/sources/takeout_legacy.py +29 -19
  43. promnesia/sources/telegram.py +18 -12
  44. promnesia/sources/telegram_legacy.py +22 -11
  45. promnesia/sources/twitter.py +7 -6
  46. promnesia/sources/vcs.py +11 -6
  47. promnesia/sources/viber.py +11 -10
  48. promnesia/sources/website.py +8 -7
  49. promnesia/sources/zulip.py +3 -2
  50. promnesia/sqlite.py +13 -7
  51. promnesia/tests/common.py +10 -5
  52. promnesia/tests/server_helper.py +13 -10
  53. promnesia/tests/sources/test_auto.py +2 -3
  54. promnesia/tests/sources/test_filetypes.py +11 -8
  55. promnesia/tests/sources/test_hypothesis.py +10 -6
  56. promnesia/tests/sources/test_org.py +9 -5
  57. promnesia/tests/sources/test_plaintext.py +9 -8
  58. promnesia/tests/sources/test_shellcmd.py +13 -13
  59. promnesia/tests/sources/test_takeout.py +3 -5
  60. promnesia/tests/test_cannon.py +256 -239
  61. promnesia/tests/test_cli.py +12 -8
  62. promnesia/tests/test_compare.py +17 -13
  63. promnesia/tests/test_config.py +7 -8
  64. promnesia/tests/test_db_dump.py +15 -15
  65. promnesia/tests/test_extract.py +17 -10
  66. promnesia/tests/test_indexer.py +24 -18
  67. promnesia/tests/test_server.py +12 -13
  68. promnesia/tests/test_traverse.py +0 -2
  69. promnesia/tests/utils.py +3 -7
  70. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  71. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  72. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  73. promnesia/kjson.py +0 -121
  74. promnesia/sources/__init__.pyi +0 -0
  75. promnesia-1.2.20240810.dist-info/METADATA +0 -54
  76. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  77. promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
  78. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  79. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
@@ -2,34 +2,33 @@
2
2
  Collects visits from Signal Desktop's encrypted SQLIite db(s).
3
3
  """
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  # Functions get their defaults from module-data.
6
8
  #
7
9
  # * Open-ciphered-db adapted from:
8
10
  # https://github.com/carderne/signal-export/commit/2284c8f4
9
11
  # * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
10
-
11
-
12
12
  import json
13
13
  import logging
14
14
  import platform
15
15
  import sqlite3
16
16
  import subprocess as sbp
17
+ from collections.abc import Iterable, Iterator, Mapping
17
18
  from contextlib import contextmanager
18
19
  from pathlib import Path
19
20
  from textwrap import dedent, indent
20
- from typing import Any, Iterable, Iterator, Mapping, Union, Optional
21
+ from typing import Any
21
22
 
22
23
  from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
23
24
 
24
- PathIshes = Union[PathIsh, Iterable[PathIsh]]
25
-
26
25
 
27
26
  def index(
28
27
  *db_paths: PathIsh,
29
28
  http_only: bool = False,
30
- locator_schema: str="editor",
29
+ locator_schema: str = "editor",
31
30
  append_platform_path: bool = False,
32
- override_key: Optional[str] = None,
31
+ override_key: str | None = None,
33
32
  ) -> Results:
34
33
  """
35
34
  :param db_paths:
@@ -51,8 +50,7 @@ def index(
51
50
  otherwise, this same key is used for harvesting all db-files.
52
51
  """
53
52
  logger.debug(
54
- "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), "
55
- "overide_key given?(%s), db_paths: %s",
53
+ "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), overide_key given?(%s), db_paths: %s",
56
54
  http_only,
57
55
  locator_schema,
58
56
  append_platform_path,
@@ -109,10 +107,10 @@ messages_query = dedent(
109
107
  id,
110
108
  type,
111
109
  coalesce(
112
- profileFullName,
113
- profileName,
110
+ profileFullName,
111
+ profileName,
114
112
  name,
115
- profileFamilyName,
113
+ profileFamilyName,
116
114
  e164
117
115
  ) as aname,
118
116
  name,
@@ -171,7 +169,10 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
171
169
 
172
170
  Expansion code adapted from https://stackoverflow.com/a/51108375/548792
173
171
  to handle also degenerate cases (``'', '.', '/'``):
172
+ """
174
173
 
174
+ # NOTE: suppressing doctest from github actions
175
+ """
175
176
  >>> str(next(iter(_get_files('/'))))
176
177
  '/'
177
178
 
@@ -194,7 +195,7 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
194
195
  return path.glob(str(Path(*parts))) if parts else [path]
195
196
 
196
197
 
197
- def _expand_paths(paths: PathIshes) -> Iterable[Path]:
198
+ def _expand_paths(paths: PathIsh | Iterable[PathIsh]) -> Iterable[Path]:
198
199
  if _is_pathish(paths):
199
200
  paths = [paths] # type: ignore[list-item]
200
201
  return [pp.resolve() for p in paths for pp in _expand_path(p)] # type: ignore[union-attr]
@@ -214,7 +215,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
214
215
  one or more pathish
215
216
 
216
217
  Note: needed `append` here, to resolve paths.
218
+ """
217
219
 
220
+ # NOTE: suppressing doctest from running on Github actions
221
+ """
218
222
  >>> bool(collect_db_paths()) # my home-path
219
223
  True
220
224
  >>> collect_db_paths(None)
@@ -237,11 +241,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
237
241
  platform_name = platform.system()
238
242
  try:
239
243
  plat_paths = platform_db_paths[platform_name]
240
- except LookupError:
244
+ except LookupError as le:
241
245
  raise ValueError(
242
- f"Unknown platform({platform_name}!"
243
- f"\n Expected one of {list(platform_db_paths.keys())}."
244
- )
246
+ f"Unknown platform({platform_name}!\n Expected one of {list(platform_db_paths.keys())}."
247
+ ) from le
245
248
 
246
249
  if db_paths and append:
247
250
  db_paths = [ # type: ignore[assignment]
@@ -261,7 +264,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
261
264
 
262
265
 
263
266
  def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
264
- with open(signal_desktop_config_path, "r") as conf:
267
+ with Path(signal_desktop_config_path).open() as conf:
265
268
  return json.load(conf)["key"]
266
269
 
267
270
 
@@ -269,6 +272,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
269
272
  def connect_db(
270
273
  db_path: Path,
271
274
  key,
275
+ *,
272
276
  decrypt_db: bool = False,
273
277
  sqlcipher_exe: PathIsh = "sqlcipher",
274
278
  **decryption_pragmas: Mapping[str, Any],
@@ -324,16 +328,14 @@ def connect_db(
324
328
  )
325
329
  sql = "\n".join(sql_cmds)
326
330
  cmd = [sqlcipher_exe, str(db_path)]
327
- logger.debug(
328
- "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
329
- )
331
+ logger.debug("Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql)
330
332
  try:
331
333
  sbp.run(
332
334
  cmd,
333
335
  check=True,
334
336
  input=sql,
335
337
  capture_output=True,
336
- universal_newlines=True,
338
+ text=True,
337
339
  )
338
340
  except sbp.CalledProcessError as ex:
339
341
  prefix = " " * 4
@@ -357,12 +359,11 @@ def connect_db(
357
359
  yield db
358
360
  finally:
359
361
  try:
360
- if db:
362
+ if db is not None:
361
363
  db.close()
362
364
  finally:
363
365
  if decrypted_file and decrypted_file.exists():
364
366
  try:
365
-
366
367
  logger.debug("Deleting temporary decrypted db: %s", decrypted_file)
367
368
  decrypted_file.unlink()
368
369
  except Exception as ex:
@@ -380,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
380
381
  if not urls:
381
382
  return
382
383
 
383
- assert (
384
+ assert ( # noqa: PT018
384
385
  text and mid and sender and chatname
385
386
  ), f"should have eliminated messages without 'http' or missing ids: {row}"
386
387
 
@@ -400,7 +401,7 @@ def _harvest_db(
400
401
  db_path: Path,
401
402
  messages_query: str,
402
403
  *,
403
- override_key: Optional[str] = None,
404
+ override_key: str | None = None,
404
405
  locator_schema: str = "editor",
405
406
  decrypt_db: bool = False,
406
407
  **decryption_pragmas,
@@ -2,15 +2,14 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
3
3
  '''
4
4
 
5
- from promnesia.common import Visit, Loc, Results, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  from my.smscalls import messages
11
11
 
12
12
  for m in messages():
13
-
14
13
  if isinstance(m, Exception):
15
14
  yield m
16
15
  continue
@@ -2,12 +2,13 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc
5
+ from promnesia.common import Loc, Results, Visit
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  import my.stackexchange.gdpr as G
11
+
11
12
  for v in G.votes():
12
13
  if isinstance(v, Exception):
13
14
  yield v
@@ -15,7 +16,7 @@ def index() -> Results:
15
16
  yield Visit(
16
17
  url=v.link,
17
18
  dt=v.when,
18
- context='voted', # todo use the votetype? although maybe worth ignoring downvotes
19
+ context='voted', # todo use the votetype? although maybe worth ignoring downvotes
19
20
  # or, downvotes could have 'negative' ranking or something
20
- locator=Loc.make(title='voted', href=v.link)
21
+ locator=Loc.make(title='voted', href=v.link),
21
22
  )
@@ -1,11 +1,14 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
3
3
  '''
4
- from typing import Iterable, Set, Any, NamedTuple
4
+
5
+ from __future__ import annotations
6
+
5
7
  import warnings
8
+ from collections.abc import Iterable
9
+ from typing import Any, NamedTuple
6
10
 
7
- from ..common import Visit, Loc, Results, logger
8
- from ..compat import removeprefix
11
+ from promnesia.common import Loc, Results, Visit, logger
9
12
 
10
13
 
11
14
  # incase user is using an old version of google_takeout_parser
@@ -14,25 +17,34 @@ class YoutubeCSVStub(NamedTuple):
14
17
 
15
18
 
16
19
  def index() -> Results:
17
- from . import hpi
18
- import json
20
+ from . import hpi # noqa: F401
19
21
 
20
22
  try:
23
+ from google_takeout_parser.models import (
24
+ Activity,
25
+ ChromeHistory,
26
+ LikedYoutubeVideo,
27
+ YoutubeComment,
28
+ )
29
+ from google_takeout_parser.parse_csv import (
30
+ extract_comment_links,
31
+ reconstruct_comment_content,
32
+ )
21
33
  from my.google.takeout.parser import events
22
- from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
23
- from google_takeout_parser.parse_csv import reconstruct_comment_content, extract_comment_links
24
34
  except ModuleNotFoundError as ex:
25
35
  logger.exception(ex)
26
36
  yield ex
27
37
 
28
- warnings.warn("Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation.")
38
+ warnings.warn(
39
+ "Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation."
40
+ )
29
41
 
30
42
  from . import takeout_legacy
43
+
31
44
  yield from takeout_legacy.index()
32
45
  return
33
46
 
34
-
35
- _seen: Set[str] = {
47
+ _seen: set[str] = {
36
48
  # these are definitely not useful for promnesia
37
49
  'Location',
38
50
  'PlaceVisit',
@@ -42,10 +54,13 @@ def index() -> Results:
42
54
  imported_yt_csv_models = False
43
55
  try:
44
56
  from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
57
+
45
58
  imported_yt_csv_models = True
46
59
  except ImportError:
47
60
  # warn user to upgrade google_takeout_parser
48
- warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
61
+ warnings.warn(
62
+ "Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments"
63
+ )
49
64
  CSVYoutubeComment = YoutubeCSVStub # type: ignore[misc,assignment]
50
65
  CSVYoutubeLiveChat = YoutubeCSVStub # type: ignore[misc,assignment]
51
66
 
@@ -54,7 +69,7 @@ def index() -> Results:
54
69
  if et_name in _seen:
55
70
  return
56
71
  _seen.add(et_name)
57
- yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
72
+ yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
58
73
 
59
74
  for e in events():
60
75
  if isinstance(e, Exception):
@@ -67,13 +82,13 @@ def index() -> Results:
67
82
  # when you follow something from search the actual url goes after this
68
83
  # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
69
84
  # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
70
- url = removeprefix(url, "https://www.google.com/url?q=")
85
+ url = url.removeprefix("https://www.google.com/url?q=")
71
86
  title = e.title
72
87
 
73
88
  if e.header == 'Chrome':
74
89
  # title contains 'Visited <page title>' in this case
75
90
  context = None
76
- title = removeprefix(title, 'Visited ')
91
+ title = title.removeprefix('Visited ')
77
92
  elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
78
93
  # todo perhaps could add to some sort of metadata?
79
94
  # only useful for debugging really
@@ -120,18 +135,14 @@ def index() -> Results:
120
135
  elif isinstance(e, LikedYoutubeVideo):
121
136
  # TODO not sure if desc makes sense here since it's not user produced data
122
137
  # it's just a part of video meta?
123
- yield Visit(
124
- url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link)
125
- )
138
+ yield Visit(url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link))
126
139
  elif isinstance(e, YoutubeComment):
127
140
  for url in e.urls:
128
141
  # todo: use url_metadata to improve locator?
129
142
  # or maybe just extract first sentence?
130
- yield Visit(
131
- url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
132
- )
143
+ yield Visit(url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url))
133
144
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
134
- contentJSON = json.loads(e.contentJSON)
145
+ contentJSON = e.contentJSON
135
146
  content = reconstruct_comment_content(contentJSON, format='text')
136
147
  if isinstance(content, Exception):
137
148
  yield content
@@ -142,14 +153,10 @@ def index() -> Results:
142
153
  continue
143
154
  context = f"Commented on {e.video_url}"
144
155
  for url in links:
145
- yield Visit(
146
- url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
147
- )
148
- yield Visit(
149
- url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
150
- )
156
+ yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
157
+ yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
151
158
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
152
- contentJSON = json.loads(e.contentJSON)
159
+ contentJSON = e.contentJSON
153
160
  content = reconstruct_comment_content(contentJSON, format='text')
154
161
  if isinstance(content, Exception):
155
162
  yield content
@@ -160,12 +167,8 @@ def index() -> Results:
160
167
  continue
161
168
  context = f"Commented on livestream {e.video_url}"
162
169
  for url in links:
163
- yield Visit(
164
- url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
165
- )
166
- yield Visit(
167
- url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
168
- )
170
+ yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
171
+ yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
169
172
  else:
170
173
  yield from warn_once_if_not_seen(e)
171
174
 
@@ -1,17 +1,23 @@
1
- from ..common import Visit, logger, PathIsh, Url, Loc, Results
1
+ from __future__ import annotations
2
+
3
+ from promnesia.common import Loc, Results, Visit, logger
4
+
2
5
 
3
6
  # TODO make an iterator, insert in db as we go? handle errors gracefully?
4
7
  def index() -> Results:
5
- from . import hpi
8
+ from . import hpi # noqa: F401,I001
6
9
  from my.google.takeout.paths import get_takeouts
10
+
7
11
  takeouts = list(get_takeouts())
8
12
  # TODO if no takeouts, raise?
9
13
  # although could raise a warning on top level, when source emitted no takeouts
10
14
 
11
15
  # TODO youtube?
16
+ # fmt: off
12
17
  google_activities = [read_google_activity(t) for t in takeouts]
13
18
  search_activities = [read_search_activity(t) for t in takeouts]
14
19
  browser_histories = [read_browser_history_json(t) for t in takeouts]
20
+ # fmt: on
15
21
 
16
22
  key = lambda v: (v.dt, v.url)
17
23
  return chain(
@@ -21,25 +27,22 @@ def index() -> Results:
21
27
  )
22
28
 
23
29
 
24
-
25
- import pytz
30
+ import json
31
+ from collections.abc import Iterable
32
+ from datetime import datetime, timezone
26
33
  from itertools import chain
27
- from datetime import datetime
28
- from typing import List, Optional, Iterable, TYPE_CHECKING
29
34
  from pathlib import Path
30
- import json
31
-
32
-
33
- from .. import config
34
-
35
35
 
36
36
  from more_itertools import unique_everseen
37
37
 
38
+ from promnesia import config
39
+
38
40
  try:
39
41
  from cachew import cachew
40
42
  except ModuleNotFoundError as me:
41
43
  if me.name != 'cachew':
42
44
  raise me
45
+
43
46
  # this module is legacy anyway, so just make it defensive
44
47
  def cachew(*args, **kwargs): # type: ignore[no-redef]
45
48
  return lambda f: f
@@ -50,7 +53,9 @@ TakeoutPath = Path
50
53
 
51
54
 
52
55
  def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
53
- from my.core.kompress import kexists
56
+ # FIXME switch to actual kompress? and use CPath?
57
+ from my.core.kompress import kexists # type: ignore[attr-defined]
58
+
54
59
  # TODO glob
55
60
  # TODO not sure about windows path separators??
56
61
  spath = 'Takeout/My Activity/' + kind
@@ -61,7 +66,8 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
61
66
 
62
67
  locator = Loc.file(spath)
63
68
  from my.google.takeout.html import read_html
64
- for dt, url, title in read_html(takeout, spath):
69
+
70
+ for dt, url, _title in read_html(takeout, spath):
65
71
  yield Visit(
66
72
  url=url,
67
73
  dt=dt,
@@ -69,6 +75,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
69
75
  debug=kind,
70
76
  )
71
77
 
78
+
72
79
  def _cpath(suffix: str):
73
80
  def fun(takeout: TakeoutPath):
74
81
  cache_dir = config.get().cache_dir
@@ -76,23 +83,27 @@ def _cpath(suffix: str):
76
83
  return None
77
84
  # doesn't need a nontrivial hash function, timestsamp is encoded in name
78
85
  return cache_dir / (takeout.name + '_' + suffix + '.cache')
86
+
79
87
  return fun
80
88
 
81
89
 
82
90
  # todo caching should this be HPI responsibility?
83
91
  # todo set global cachew logging on init?
84
- @cachew(cache_path=_cpath('google_activity') , logger=logger)
92
+ @cachew(cache_path=_cpath('google_activity'), logger=logger)
85
93
  def read_google_activity(takeout: TakeoutPath) -> Iterable[Visit]:
86
94
  return _read_myactivity_html(takeout, 'Chrome/MyActivity.html')
87
95
 
88
- @cachew(cache_path=_cpath('search_activity') , logger=logger)
96
+
97
+ @cachew(cache_path=_cpath('search_activity'), logger=logger)
89
98
  def read_search_activity(takeout: TakeoutPath) -> Iterable[Visit]:
90
99
  return _read_myactivity_html(takeout, 'Search/MyActivity.html')
91
100
 
101
+
92
102
  # TODO add this to tests?
93
103
  @cachew(cache_path=_cpath('browser_activity'), logger=logger)
94
104
  def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
95
- from my.core.kompress import kexists, kopen
105
+ from my.core.kompress import kexists, kopen # type: ignore[attr-defined]
106
+
96
107
  # not sure if this deserves moving to HPI? it's pretty trivial for now
97
108
  spath = 'Takeout/Chrome/BrowserHistory.json'
98
109
 
@@ -107,13 +118,13 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
107
118
  # TODO this should be supported by HPI now?
108
119
 
109
120
  j = None
110
- with kopen(takeout, spath) as fo: # TODO iterative parser?
121
+ with kopen(takeout, spath) as fo: # TODO iterative parser?
111
122
  j = json.load(fo)
112
123
 
113
124
  hist = j['Browser History']
114
125
  for item in hist:
115
126
  url = item['url']
116
- time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
127
+ time = datetime.fromtimestamp(item['time_usec'] / 10**6, tz=timezone.utc)
117
128
  # TODO any more interesitng info?
118
129
  yield Visit(
119
130
  url=url,
@@ -121,4 +132,3 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
121
132
  locator=locator,
122
133
  debug='Chrome/BrowserHistory.json',
123
134
  )
124
-
@@ -1,11 +1,12 @@
1
- from typing import Optional
2
- from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
1
+ from __future__ import annotations
2
+
3
3
  import warnings
4
+ from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
4
5
 
5
- from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh
6
+ from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
6
7
 
7
8
 
8
- def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results:
9
+ def index(database: PathIsh | None = None, *, http_only: bool = False, with_extra_media_info: bool = False) -> Results:
9
10
  if database is None:
10
11
  # fully relying on HPI
11
12
  yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -16,11 +17,14 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
16
17
  f'Will try to hack database path {database} into HPI config.'
17
18
  )
18
19
  try:
19
- yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
20
- return
20
+ yield from _index_new_with_adhoc_config(
21
+ database=database, http_only=http_only, with_extra_media_info=with_extra_media_info
22
+ )
21
23
  except Exception as e:
22
24
  logger.exception(e)
23
25
  warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
26
+ else:
27
+ return
24
28
 
25
29
  logger.warning("Falling back onto promnesia.sources.telegram_legacy module")
26
30
  yield from _index_legacy(database=database, http_only=http_only)
@@ -28,11 +32,12 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
28
32
 
29
33
  def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
30
34
  from . import telegram_legacy
35
+
31
36
  yield from telegram_legacy.index(database=database, http_only=http_only)
32
37
 
33
38
 
34
39
  def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
35
- from . import hpi
40
+ from . import hpi # noqa: F401
36
41
 
37
42
  class config:
38
43
  class telegram:
@@ -40,19 +45,20 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
40
45
  export_path: PathIsh = database
41
46
 
42
47
  from my.core.cfg import tmp_config
48
+
43
49
  with tmp_config(modules='my.telegram.telegram_backup', config=config):
44
50
  yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
45
51
 
46
52
 
47
53
  def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results:
48
- from . import hpi
54
+ from . import hpi # noqa: F401,I001
49
55
  from my.telegram.telegram_backup import messages
50
56
 
51
57
  extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None
52
- for i, m in enumerate(messages(
53
- with_extra_media_info=with_extra_media_info,
54
- extra_where=extra_where,
55
- )):
58
+ for m in messages(
59
+ with_extra_media_info=with_extra_media_info,
60
+ extra_where=extra_where,
61
+ ):
56
62
  text = m.text
57
63
 
58
64
  urls = extract_urls(text)
@@ -2,34 +2,42 @@
2
2
  Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data
3
3
  '''
4
4
 
5
- from pathlib import Path
5
+ from __future__ import annotations
6
+
6
7
  import sqlite3
8
+ from pathlib import Path
7
9
  from textwrap import dedent
8
- from typing import Union, TypeVar
9
- from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
10
+ from typing import TypeVar
11
+ from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
12
+
13
+ from promnesia.common import (
14
+ Loc,
15
+ PathIsh,
16
+ Results,
17
+ Visit,
18
+ echain,
19
+ extract_urls,
20
+ from_epoch,
21
+ )
10
22
 
11
- from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain
12
23
  from ..sqlite import sqlite_connection
13
24
 
14
25
  T = TypeVar("T")
15
26
 
16
27
 
17
- def unwrap(res: Union[T, Exception]) -> T:
28
+ def unwrap(res: T | Exception) -> T:
18
29
  if isinstance(res, Exception):
19
30
  raise res
20
- else:
21
- return res
31
+ return res
22
32
 
23
33
 
24
- def index(database: PathIsh, *, http_only: bool=False) -> Results:
34
+ def index(database: PathIsh, *, http_only: bool = False) -> Results:
25
35
  """
26
36
  :param database:
27
37
  the path of the sqlite generated by the _telegram_backup_ java program
28
38
  :param http_only:
29
39
  when true, do not collect IP-addresses and `python.py` strings
30
40
  """
31
- logger = get_logger()
32
-
33
41
  path = Path(database)
34
42
  assert path.is_file(), path
35
43
 
@@ -66,7 +74,8 @@ def index(database: PathIsh, *, http_only: bool=False) -> Results:
66
74
  M.message_type NOT IN ('service_message', 'empty_message')
67
75
  {extra_criteria}
68
76
  ORDER BY time;
69
- """)
77
+ """
78
+ )
70
79
 
71
80
  with sqlite_connection(path, immutable=True, row_factory='row') as db:
72
81
  # TODO yield error if chatname or chat or smth else is null?
@@ -94,6 +103,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
94
103
  urls = extract_urls(text)
95
104
  if len(urls) == 0:
96
105
  return
106
+ # fmt: off
97
107
  dt = from_epoch(row['time'])
98
108
  mid: str = unwrap(row['mid'])
99
109
 
@@ -101,6 +111,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
101
111
  sender: str = unwrap(row['sender'])
102
112
  chatname: str = unwrap(row['chatname'])
103
113
  chat: str = unwrap(row['chat'])
114
+ # fmt: on
104
115
 
105
116
  in_context = f'https://t.me/{chat}/{mid}'
106
117
  for u in urls: