promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +60 -35
  3. promnesia/cannon.py +27 -27
  4. promnesia/common.py +85 -67
  5. promnesia/compare.py +21 -22
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +23 -23
  8. promnesia/database/common.py +67 -0
  9. promnesia/database/dump.py +188 -0
  10. promnesia/{read_db.py → database/load.py} +16 -17
  11. promnesia/extract.py +14 -11
  12. promnesia/kjson.py +12 -11
  13. promnesia/logging.py +4 -4
  14. promnesia/misc/__init__.pyi +0 -0
  15. promnesia/misc/config_example.py +1 -2
  16. promnesia/misc/install_server.py +7 -9
  17. promnesia/server.py +57 -47
  18. promnesia/sources/__init__.pyi +0 -0
  19. promnesia/sources/auto.py +50 -35
  20. promnesia/sources/auto_logseq.py +6 -5
  21. promnesia/sources/auto_obsidian.py +2 -2
  22. promnesia/sources/browser.py +14 -9
  23. promnesia/sources/browser_legacy.py +26 -16
  24. promnesia/sources/demo.py +19 -3
  25. promnesia/sources/fbmessenger.py +3 -2
  26. promnesia/sources/filetypes.py +16 -7
  27. promnesia/sources/github.py +7 -9
  28. promnesia/sources/guess.py +2 -1
  29. promnesia/sources/hackernews.py +2 -2
  30. promnesia/sources/hpi.py +2 -2
  31. promnesia/sources/html.py +7 -5
  32. promnesia/sources/hypothesis.py +4 -3
  33. promnesia/sources/instapaper.py +2 -2
  34. promnesia/sources/markdown.py +31 -21
  35. promnesia/sources/org.py +27 -13
  36. promnesia/sources/plaintext.py +30 -29
  37. promnesia/sources/pocket.py +3 -2
  38. promnesia/sources/reddit.py +20 -19
  39. promnesia/sources/roamresearch.py +2 -1
  40. promnesia/sources/rss.py +4 -5
  41. promnesia/sources/shellcmd.py +19 -6
  42. promnesia/sources/signal.py +33 -24
  43. promnesia/sources/smscalls.py +2 -2
  44. promnesia/sources/stackexchange.py +4 -3
  45. promnesia/sources/takeout.py +76 -9
  46. promnesia/sources/takeout_legacy.py +24 -12
  47. promnesia/sources/telegram.py +13 -11
  48. promnesia/sources/telegram_legacy.py +18 -7
  49. promnesia/sources/twitter.py +6 -5
  50. promnesia/sources/vcs.py +5 -3
  51. promnesia/sources/viber.py +10 -9
  52. promnesia/sources/website.py +4 -4
  53. promnesia/sources/zulip.py +3 -2
  54. promnesia/sqlite.py +7 -4
  55. promnesia/tests/__init__.py +0 -0
  56. promnesia/tests/common.py +140 -0
  57. promnesia/tests/server_helper.py +67 -0
  58. promnesia/tests/sources/__init__.py +0 -0
  59. promnesia/tests/sources/test_auto.py +65 -0
  60. promnesia/tests/sources/test_filetypes.py +43 -0
  61. promnesia/tests/sources/test_hypothesis.py +39 -0
  62. promnesia/tests/sources/test_org.py +64 -0
  63. promnesia/tests/sources/test_plaintext.py +25 -0
  64. promnesia/tests/sources/test_shellcmd.py +21 -0
  65. promnesia/tests/sources/test_takeout.py +56 -0
  66. promnesia/tests/test_cannon.py +325 -0
  67. promnesia/tests/test_cli.py +40 -0
  68. promnesia/tests/test_compare.py +30 -0
  69. promnesia/tests/test_config.py +289 -0
  70. promnesia/tests/test_db_dump.py +222 -0
  71. promnesia/tests/test_extract.py +65 -0
  72. promnesia/tests/test_extract_urls.py +43 -0
  73. promnesia/tests/test_indexer.py +251 -0
  74. promnesia/tests/test_server.py +291 -0
  75. promnesia/tests/test_traverse.py +39 -0
  76. promnesia/tests/utils.py +35 -0
  77. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
  78. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  79. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  80. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
  81. promnesia/dump.py +0 -105
  82. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  83. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  84. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Roam Research data
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
@@ -43,6 +43,7 @@ def _collect(node: 'RoamNode') -> Results:
43
43
 
44
44
 
45
45
  import typing
46
+
46
47
  if typing.TYPE_CHECKING:
47
48
  import my.roamresearch as RR
48
49
  RoamNode = RR.Node
promnesia/sources/rss.py CHANGED
@@ -2,14 +2,12 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
3
3
  '''
4
4
 
5
- from itertools import chain
6
-
7
- from ..common import Visit, Loc, extract_urls, Results, get_logger
8
-
9
5
  from datetime import datetime
10
6
 
11
7
  import pytz
12
8
 
9
+ from promnesia.common import Loc, Results, Visit
10
+
13
11
  # arbitrary, 2011-11-04 00:05:23.283+00:00
14
12
  default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
15
13
  # TODO FIXME allow for visit not to have datetime?
@@ -17,12 +15,13 @@ default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
17
15
 
18
16
  def index() -> Results:
19
17
  from my.rss.all import subscriptions
18
+
20
19
  for feed in subscriptions():
21
20
  # TODO locator should be optional too? although could use direct link in the rss reader interface
22
21
  locator = Loc.make(title='my.rss')
23
22
  yield Visit(
24
23
  url=feed.url,
25
24
  dt=feed.created_at or default_datetime,
26
- context=f'RSS subscription', # TODO use 'provider', etc?
25
+ context='RSS subscription', # TODO use 'provider', etc?
27
26
  locator=locator,
28
27
  )
@@ -2,18 +2,31 @@
2
2
  Greps out URLs from an arbitrary shell command results.
3
3
  """
4
4
 
5
- from datetime import datetime
5
+ from __future__ import annotations
6
+
6
7
  import os
7
8
  import re
8
- from subprocess import run, PIPE
9
- from typing import Union, Sequence
10
9
  import warnings
10
+ from collections.abc import Sequence
11
+ from datetime import datetime
12
+ from subprocess import PIPE, run
13
+
14
+ from promnesia.common import (
15
+ Loc,
16
+ PathIsh,
17
+ Results,
18
+ Visit,
19
+ _is_windows,
20
+ extract_urls,
21
+ file_mtime,
22
+ get_system_tz,
23
+ now_tz,
24
+ )
11
25
 
12
- from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh
13
26
  from .plaintext import _has_grep
14
27
 
15
28
 
16
- def index(command: Union[str, Sequence[PathIsh]]) -> Results:
29
+ def index(command: str | Sequence[PathIsh]) -> Results:
17
30
  cmd: Sequence[PathIsh]
18
31
  cmds: str
19
32
  if isinstance(command, str):
@@ -71,7 +84,7 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
71
84
  context=context,
72
85
  )
73
86
 
74
- r = run(cmd, stdout=PIPE)
87
+ r = run(cmd, stdout=PIPE, check=False)
75
88
  if r.returncode > 0:
76
89
  if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
77
90
  r.check_returncode()
@@ -1,23 +1,23 @@
1
1
  """
2
2
  Collects visits from Signal Desktop's encrypted SQLIite db(s).
3
3
  """
4
+ from __future__ import annotations
4
5
 
5
6
  # Functions get their defaults from module-data.
6
7
  #
7
8
  # * Open-ciphered-db adapted from:
8
9
  # https://github.com/carderne/signal-export/commit/2284c8f4
9
10
  # * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
10
-
11
-
12
11
  import json
13
12
  import logging
14
13
  import platform
15
14
  import sqlite3
16
15
  import subprocess as sbp
16
+ from collections.abc import Iterable, Iterator, Mapping
17
17
  from contextlib import contextmanager
18
18
  from pathlib import Path
19
19
  from textwrap import dedent, indent
20
- from typing import Any, Iterable, Iterator, Mapping, Union, Optional
20
+ from typing import Any, Union
21
21
 
22
22
  from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
23
23
 
@@ -29,7 +29,7 @@ def index(
29
29
  http_only: bool = False,
30
30
  locator_schema: str="editor",
31
31
  append_platform_path: bool = False,
32
- override_key: Optional[str] = None,
32
+ override_key: str | None = None,
33
33
  ) -> Results:
34
34
  """
35
35
  :param db_paths:
@@ -63,6 +63,8 @@ def index(
63
63
  logger.debug("Paths to harvest: %s", db_paths)
64
64
  if not http_only:
65
65
  sql_query = f"{messages_query}\nWHERE body LIKE '%http%'"
66
+ else:
67
+ sql_query = messages_query
66
68
 
67
69
  for db_path in resolved_db_paths:
68
70
  logger.info("Ciphered db to harvest %s", db_path)
@@ -106,12 +108,18 @@ messages_query = dedent(
106
108
  SELECT
107
109
  id,
108
110
  type,
109
- coalesce(name, profileName, profileFamilyName, e164) as aname,
111
+ coalesce(
112
+ profileFullName,
113
+ profileName,
114
+ name,
115
+ profileFamilyName,
116
+ e164
117
+ ) as aname,
110
118
  name,
111
119
  profileName,
112
120
  profileFamilyName,
113
121
  e164,
114
- uuid
122
+ serviceId
115
123
  FROM conversations
116
124
  ),
117
125
  Msgs AS (
@@ -123,8 +131,8 @@ messages_query = dedent(
123
131
  M.received_at,
124
132
  M.sent_at
125
133
  ) AS timestamp,
126
- IIF(M.type = "outgoing",
127
- "Me (" || C2.aname || ")",
134
+ IIF(M.type = 'outgoing',
135
+ 'Me (' || C2.aname || ')',
128
136
  C2.aname
129
137
  ) AS sender,
130
138
  M.conversationId AS cid,
@@ -138,7 +146,7 @@ messages_query = dedent(
138
146
  INNER JOIN Cons AS C1
139
147
  ON M.conversationId = C1.id
140
148
  INNER JOIN Cons AS C2
141
- ON M.sourceUuid = C2.uuid
149
+ ON M.sourceServiceId = C2.serviceId
142
150
  )
143
151
  SELECT id, timestamp, sender, cid, chatname, body
144
152
  FROM Msgs
@@ -188,8 +196,8 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
188
196
 
189
197
  def _expand_paths(paths: PathIshes) -> Iterable[Path]:
190
198
  if _is_pathish(paths):
191
- paths = [paths] # type: ignore[assignment,list-item]
192
- return [pp.resolve() for p in paths for pp in _expand_path(p)] # type: ignore[union-attr,list-item]
199
+ paths = [paths] # type: ignore[list-item]
200
+ return [pp.resolve() for p in paths for pp in _expand_path(p)] # type: ignore[union-attr]
193
201
 
194
202
 
195
203
  def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]:
@@ -229,14 +237,14 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
229
237
  platform_name = platform.system()
230
238
  try:
231
239
  plat_paths = platform_db_paths[platform_name]
232
- except LookupError:
240
+ except LookupError as le:
233
241
  raise ValueError(
234
242
  f"Unknown platform({platform_name}!"
235
243
  f"\n Expected one of {list(platform_db_paths.keys())}."
236
- )
244
+ ) from le
237
245
 
238
246
  if db_paths and append:
239
- db_paths = [ # type: ignore[misc,assignment]
247
+ db_paths = [ # type: ignore[assignment]
240
248
  *([db_paths] if _is_pathish(db_paths) else db_paths),
241
249
  plat_paths,
242
250
  ]
@@ -253,7 +261,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
253
261
 
254
262
 
255
263
  def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
256
- with open(signal_desktop_config_path, "r") as conf:
264
+ with Path(signal_desktop_config_path).open() as conf:
257
265
  return json.load(conf)["key"]
258
266
 
259
267
 
@@ -261,6 +269,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
261
269
  def connect_db(
262
270
  db_path: Path,
263
271
  key,
272
+ *,
264
273
  decrypt_db: bool = False,
265
274
  sqlcipher_exe: PathIsh = "sqlcipher",
266
275
  **decryption_pragmas: Mapping[str, Any],
@@ -310,8 +319,8 @@ def connect_db(
310
319
  sql_cmds.extend(
311
320
  [
312
321
  f"ATTACH DATABASE '{decrypted_file}' AS plaintext KEY '';",
313
- f"SELECT sqlcipher_export('plaintext');",
314
- f"DETACH DATABASE plaintext;",
322
+ "SELECT sqlcipher_export('plaintext');",
323
+ "DETACH DATABASE plaintext;",
315
324
  ]
316
325
  )
317
326
  sql = "\n".join(sql_cmds)
@@ -320,12 +329,12 @@ def connect_db(
320
329
  "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
321
330
  )
322
331
  try:
323
- sbp.run( # type: ignore[call-overload]
332
+ sbp.run(
324
333
  cmd,
325
334
  check=True,
326
335
  input=sql,
327
336
  capture_output=True,
328
- universal_newlines=True,
337
+ text=True,
329
338
  )
330
339
  except sbp.CalledProcessError as ex:
331
340
  prefix = " " * 4
@@ -335,7 +344,7 @@ def connect_db(
335
344
  ) from None
336
345
  db = sqlite3.connect(f"file:{decrypted_file}?mode=ro", uri=True)
337
346
  else:
338
- from sqlcipher3 import dbapi2 # type: ignore[import]
347
+ from sqlcipher3 import dbapi2 # type: ignore[import-not-found]
339
348
 
340
349
  db = dbapi2.connect(f"file:{db_path}?mode=ro", uri=True)
341
350
  # Param-binding doesn't work for pragmas, so use a direct string concat.
@@ -372,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
372
381
  if not urls:
373
382
  return
374
383
 
375
- assert (
384
+ assert ( # noqa: PT018
376
385
  text and mid and sender and chatname
377
386
  ), f"should have eliminated messages without 'http' or missing ids: {row}"
378
387
 
@@ -392,7 +401,7 @@ def _harvest_db(
392
401
  db_path: Path,
393
402
  messages_query: str,
394
403
  *,
395
- override_key: Optional[str] = None,
404
+ override_key: str | None = None,
396
405
  locator_schema: str = "editor",
397
406
  decrypt_db: bool = False,
398
407
  **decryption_pragmas,
@@ -419,9 +428,9 @@ def _harvest_db(
419
428
 
420
429
  with connect_db(db_path, key, decrypt_db=decrypt_db, **decryption_pragmas) as db:
421
430
  for mid, tstamp, sender, cid, chatname, text in db.execute(messages_query):
431
+ tstamp = from_epoch(tstamp / 1000.0)
432
+ row = (mid, tstamp, sender, cid, chatname, text)
422
433
  try:
423
- tstamp = from_epoch(tstamp / 1000.0)
424
- row = (mid, tstamp, sender, cid, chatname, text)
425
434
  yield from _handle_row(row, db_path, locator_schema)
426
435
  except Exception as ex:
427
436
  # TODO: also insert errors in db
@@ -2,11 +2,11 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
3
3
  '''
4
4
 
5
- from promnesia.common import Visit, Loc, Results, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  from my.smscalls import messages
11
11
 
12
12
  for m in messages():
@@ -2,12 +2,13 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc, extract_urls
5
+ from promnesia.common import Loc, Results, Visit
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
10
- import my.stackexchange.gdpr as G # type: ignore[import] # TODO eh, not sure if should run against pypi or not...
9
+ from . import hpi # noqa: F401,I001
10
+ import my.stackexchange.gdpr as G
11
+
11
12
  for v in G.votes():
12
13
  if isinstance(v, Exception):
13
14
  yield v
@@ -1,19 +1,36 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
3
3
  '''
4
- from typing import Iterable, Set, Any
4
+
5
+ from __future__ import annotations
6
+
5
7
  import warnings
8
+ from collections.abc import Iterable
9
+ from typing import Any, NamedTuple
10
+
11
+ from promnesia.common import Loc, Results, Visit, logger
6
12
 
7
- from ..common import Visit, Loc, Results, logger
8
- from ..compat import removeprefix
13
+
14
+ # incase user is using an old version of google_takeout_parser
15
+ class YoutubeCSVStub(NamedTuple):
16
+ contentJSON: str
9
17
 
10
18
 
11
19
  def index() -> Results:
12
- from . import hpi
20
+ from . import hpi # noqa: F401
13
21
 
14
22
  try:
23
+ from google_takeout_parser.models import (
24
+ Activity,
25
+ ChromeHistory,
26
+ LikedYoutubeVideo,
27
+ YoutubeComment,
28
+ )
29
+ from google_takeout_parser.parse_csv import (
30
+ extract_comment_links,
31
+ reconstruct_comment_content,
32
+ )
15
33
  from my.google.takeout.parser import events
16
- from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
17
34
  except ModuleNotFoundError as ex:
18
35
  logger.exception(ex)
19
36
  yield ex
@@ -24,18 +41,30 @@ def index() -> Results:
24
41
  yield from takeout_legacy.index()
25
42
  return
26
43
 
27
- _seen: Set[str] = {
44
+
45
+ _seen: set[str] = {
28
46
  # these are definitely not useful for promnesia
29
47
  'Location',
30
48
  'PlaceVisit',
31
49
  'PlayStoreAppInstall',
32
50
  }
51
+
52
+ imported_yt_csv_models = False
53
+ try:
54
+ from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
55
+ imported_yt_csv_models = True
56
+ except ImportError:
57
+ # warn user to upgrade google_takeout_parser
58
+ warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
59
+ CSVYoutubeComment = YoutubeCSVStub # type: ignore[misc,assignment]
60
+ CSVYoutubeLiveChat = YoutubeCSVStub # type: ignore[misc,assignment]
61
+
33
62
  def warn_once_if_not_seen(e: Any) -> Iterable[Exception]:
34
63
  et_name = type(e).__name__
35
64
  if et_name in _seen:
36
65
  return
37
66
  _seen.add(et_name)
38
- yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
67
+ yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
39
68
 
40
69
  for e in events():
41
70
  if isinstance(e, Exception):
@@ -48,13 +77,13 @@ def index() -> Results:
48
77
  # when you follow something from search the actual url goes after this
49
78
  # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
50
79
  # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
51
- url = removeprefix(url, "https://www.google.com/url?q=")
80
+ url = url.removeprefix("https://www.google.com/url?q=")
52
81
  title = e.title
53
82
 
54
83
  if e.header == 'Chrome':
55
84
  # title contains 'Visited <page title>' in this case
56
85
  context = None
57
- title = removeprefix(title, 'Visited ')
86
+ title = title.removeprefix('Visited ')
58
87
  elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
59
88
  # todo perhaps could add to some sort of metadata?
60
89
  # only useful for debugging really
@@ -71,6 +100,8 @@ def index() -> Results:
71
100
  elif e.products == ['Ads']:
72
101
  # header contains some weird internal ad id in this case
73
102
  context = None
103
+ else:
104
+ context = None
74
105
  # NOTE: at this point seems that context always ends up as None (at least for @karlicoss as of 20230131)
75
106
  # so alternatively could just force it to be None instead of manual dispatching :shrug:
76
107
  yield Visit(
@@ -109,6 +140,42 @@ def index() -> Results:
109
140
  yield Visit(
110
141
  url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
111
142
  )
143
+ elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
144
+ contentJSON = e.contentJSON
145
+ content = reconstruct_comment_content(contentJSON, format='text')
146
+ if isinstance(content, Exception):
147
+ yield content
148
+ continue
149
+ links = extract_comment_links(contentJSON)
150
+ if isinstance(links, Exception):
151
+ yield links
152
+ continue
153
+ context = f"Commented on {e.video_url}"
154
+ for url in links:
155
+ yield Visit(
156
+ url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
157
+ )
158
+ yield Visit(
159
+ url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
160
+ )
161
+ elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
162
+ contentJSON = e.contentJSON
163
+ content = reconstruct_comment_content(contentJSON, format='text')
164
+ if isinstance(content, Exception):
165
+ yield content
166
+ continue
167
+ links = extract_comment_links(contentJSON)
168
+ if isinstance(links, Exception):
169
+ yield links
170
+ continue
171
+ context = f"Commented on livestream {e.video_url}"
172
+ for url in links:
173
+ yield Visit(
174
+ url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
175
+ )
176
+ yield Visit(
177
+ url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
178
+ )
112
179
  else:
113
180
  yield from warn_once_if_not_seen(e)
114
181
 
@@ -1,9 +1,13 @@
1
- from ..common import Visit, logger, PathIsh, Url, Loc, Results
1
+ from __future__ import annotations
2
+
3
+ from promnesia.common import Loc, Results, Visit, logger
4
+
2
5
 
3
6
  # TODO make an iterator, insert in db as we go? handle errors gracefully?
4
7
  def index() -> Results:
5
- from . import hpi
8
+ from . import hpi # noqa: F401,I001
6
9
  from my.google.takeout.paths import get_takeouts
10
+
7
11
  takeouts = list(get_takeouts())
8
12
  # TODO if no takeouts, raise?
9
13
  # although could raise a warning on top level, when source emitted no takeouts
@@ -22,19 +26,25 @@ def index() -> Results:
22
26
 
23
27
 
24
28
 
25
- import pytz
26
- from itertools import chain
29
+ import json
30
+ from collections.abc import Iterable
27
31
  from datetime import datetime
28
- from typing import List, Optional, Iterable, TYPE_CHECKING
32
+ from itertools import chain
29
33
  from pathlib import Path
30
- import json
31
-
32
34
 
33
- from .. import config
35
+ import pytz
36
+ from more_itertools import unique_everseen
34
37
 
38
+ from promnesia import config
35
39
 
36
- from more_itertools import unique_everseen
37
- from cachew import cachew
40
+ try:
41
+ from cachew import cachew
42
+ except ModuleNotFoundError as me:
43
+ if me.name != 'cachew':
44
+ raise me
45
+ # this module is legacy anyway, so just make it defensive
46
+ def cachew(*args, **kwargs): # type: ignore[no-redef]
47
+ return lambda f: f
38
48
 
39
49
 
40
50
  # TODO use CPath? Could encapsulate a path within an archive *or* within a directory
@@ -42,7 +52,9 @@ TakeoutPath = Path
42
52
 
43
53
 
44
54
  def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
55
+ # FIXME switch to actual kompress? and use CPath?
45
56
  from my.core.kompress import kexists
57
+
46
58
  # TODO glob
47
59
  # TODO not sure about windows path separators??
48
60
  spath = 'Takeout/My Activity/' + kind
@@ -53,7 +65,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
53
65
 
54
66
  locator = Loc.file(spath)
55
67
  from my.google.takeout.html import read_html
56
- for dt, url, title in read_html(takeout, spath):
68
+ for dt, url, _title in read_html(takeout, spath):
57
69
  yield Visit(
58
70
  url=url,
59
71
  dt=dt,
@@ -105,7 +117,7 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
105
117
  hist = j['Browser History']
106
118
  for item in hist:
107
119
  url = item['url']
108
- time = datetime.utcfromtimestamp(item['time_usec'] / 10 ** 6).replace(tzinfo=pytz.utc)
120
+ time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
109
121
  # TODO any more interesitng info?
110
122
  yield Visit(
111
123
  url=url,
@@ -1,11 +1,12 @@
1
- from typing import Optional
2
- from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
1
+ from __future__ import annotations
2
+
3
3
  import warnings
4
+ from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
4
5
 
5
- from promnesia.common import Results, logger, extract_urls, Visit, Loc, PathIsh
6
+ from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
6
7
 
7
8
 
8
- def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results:
9
+ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results:
9
10
  if database is None:
10
11
  # fully relying on HPI
11
12
  yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -17,10 +18,11 @@ def index(database: Optional[PathIsh]=None, *, http_only: bool=False, with_extra
17
18
  )
18
19
  try:
19
20
  yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
20
- return
21
21
  except Exception as e:
22
22
  logger.exception(e)
23
23
  warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
24
+ else:
25
+ return
24
26
 
25
27
  logger.warning("Falling back onto promnesia.sources.telegram_legacy module")
26
28
  yield from _index_legacy(database=database, http_only=http_only)
@@ -32,7 +34,7 @@ def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
32
34
 
33
35
 
34
36
  def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
35
- from . import hpi
37
+ from . import hpi # noqa: F401,I001
36
38
 
37
39
  class config:
38
40
  class telegram:
@@ -45,14 +47,14 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
45
47
 
46
48
 
47
49
  def _index_new(*, http_only: bool, with_extra_media_info: bool) -> Results:
48
- from . import hpi
50
+ from . import hpi # noqa: F401,I001
49
51
  from my.telegram.telegram_backup import messages
50
52
 
51
53
  extra_where = "(has_media == 1 OR text LIKE '%http%')" if http_only else None
52
- for i, m in enumerate(messages(
53
- with_extra_media_info=with_extra_media_info,
54
- extra_where=extra_where,
55
- )):
54
+ for m in messages(
55
+ with_extra_media_info=with_extra_media_info,
56
+ extra_where=extra_where,
57
+ ):
56
58
  text = m.text
57
59
 
58
60
  urls = extract_urls(text)
@@ -2,23 +2,34 @@
2
2
  Uses [[https://github.com/fabianonline/telegram_backup#readme][telegram_backup]] database for messages data
3
3
  '''
4
4
 
5
- from pathlib import Path
5
+ from __future__ import annotations
6
+
6
7
  import sqlite3
8
+ from pathlib import Path
7
9
  from textwrap import dedent
8
- from typing import Union, TypeVar
9
- from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
10
+ from typing import TypeVar
11
+ from urllib.parse import unquote # TODO mm, make it easier to rememember to use...
12
+
13
+ from promnesia.common import (
14
+ Loc,
15
+ PathIsh,
16
+ Results,
17
+ Visit,
18
+ echain,
19
+ extract_urls,
20
+ from_epoch,
21
+ get_logger,
22
+ )
10
23
 
11
- from ..common import PathIsh, Visit, get_logger, Loc, extract_urls, from_epoch, Results, echain
12
24
  from ..sqlite import sqlite_connection
13
25
 
14
26
  T = TypeVar("T")
15
27
 
16
28
 
17
- def unwrap(res: Union[T, Exception]) -> T:
29
+ def unwrap(res: T | Exception) -> T:
18
30
  if isinstance(res, Exception):
19
31
  raise res
20
- else:
21
- return res
32
+ return res
22
33
 
23
34
 
24
35
  def index(database: PathIsh, *, http_only: bool=False) -> Results:
@@ -1,18 +1,19 @@
1
1
  '''
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Twitter data.
3
3
  '''
4
- from typing import Iterable
5
4
 
6
- from ..common import logger, Results, Visit, Loc, extract_urls, Res
5
+ from collections.abc import Iterable
6
+
7
+ from promnesia.common import Loc, Res, Results, Visit, extract_urls, logger
7
8
 
8
9
 
9
10
  def index() -> Results:
10
- from . import hpi
11
+ from . import hpi # noqa: F401,I001
11
12
  import my.twitter.all as tw
13
+ from my.twitter.archive import Tweet # todo extract to common or something?
14
+
12
15
  # TODO hmm. tweets themselves are sort of visits? not sure if they should contribute..
13
16
  processed = 0
14
-
15
- from my.twitter.archive import Tweet # todo extract to common or something?
16
17
  tweets: Iterable[Res[Tweet]] = tw.tweets()
17
18
  for t in tweets:
18
19
  if isinstance(t, Exception):
promnesia/sources/vcs.py CHANGED
@@ -1,12 +1,14 @@
1
1
  '''
2
2
  Clones & indexes Git repositories (via sources.auto)
3
3
  '''
4
- # TODO not sure if worth exposing... could be just handled by auto or something?)
4
+ from __future__ import annotations
5
5
 
6
- from pathlib import Path
7
6
  import re
7
+ from collections.abc import Iterable
8
+
9
+ # TODO not sure if worth exposing... could be just handled by auto or something?)
10
+ from pathlib import Path
8
11
  from subprocess import check_call
9
- from typing import Iterable
10
12
 
11
13
  from ..common import Extraction, PathIsh, get_tmpdir, slugify
12
14