promnesia 1.2.20240810__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +38 -25
  3. promnesia/cannon.py +23 -23
  4. promnesia/common.py +49 -42
  5. promnesia/compare.py +18 -20
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +20 -22
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +14 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +13 -11
  12. promnesia/kjson.py +11 -10
  13. promnesia/logging.py +1 -1
  14. promnesia/misc/install_server.py +7 -8
  15. promnesia/server.py +42 -31
  16. promnesia/sources/auto.py +43 -30
  17. promnesia/sources/auto_logseq.py +6 -5
  18. promnesia/sources/auto_obsidian.py +2 -2
  19. promnesia/sources/browser.py +14 -9
  20. promnesia/sources/browser_legacy.py +17 -13
  21. promnesia/sources/demo.py +7 -7
  22. promnesia/sources/fbmessenger.py +3 -2
  23. promnesia/sources/filetypes.py +9 -7
  24. promnesia/sources/github.py +5 -7
  25. promnesia/sources/guess.py +2 -1
  26. promnesia/sources/hackernews.py +2 -2
  27. promnesia/sources/hpi.py +2 -2
  28. promnesia/sources/html.py +7 -5
  29. promnesia/sources/hypothesis.py +3 -2
  30. promnesia/sources/instapaper.py +2 -2
  31. promnesia/sources/markdown.py +17 -7
  32. promnesia/sources/org.py +20 -10
  33. promnesia/sources/plaintext.py +30 -31
  34. promnesia/sources/pocket.py +3 -2
  35. promnesia/sources/reddit.py +19 -18
  36. promnesia/sources/roamresearch.py +2 -1
  37. promnesia/sources/rss.py +3 -4
  38. promnesia/sources/shellcmd.py +19 -6
  39. promnesia/sources/signal.py +14 -13
  40. promnesia/sources/smscalls.py +2 -2
  41. promnesia/sources/stackexchange.py +3 -2
  42. promnesia/sources/takeout.py +23 -13
  43. promnesia/sources/takeout_legacy.py +15 -11
  44. promnesia/sources/telegram.py +13 -11
  45. promnesia/sources/telegram_legacy.py +18 -7
  46. promnesia/sources/twitter.py +6 -5
  47. promnesia/sources/vcs.py +5 -3
  48. promnesia/sources/viber.py +10 -9
  49. promnesia/sources/website.py +4 -4
  50. promnesia/sources/zulip.py +3 -2
  51. promnesia/sqlite.py +7 -4
  52. promnesia/tests/common.py +8 -5
  53. promnesia/tests/server_helper.py +11 -8
  54. promnesia/tests/sources/test_auto.py +2 -3
  55. promnesia/tests/sources/test_filetypes.py +2 -1
  56. promnesia/tests/sources/test_hypothesis.py +3 -3
  57. promnesia/tests/sources/test_org.py +2 -3
  58. promnesia/tests/sources/test_plaintext.py +0 -1
  59. promnesia/tests/sources/test_shellcmd.py +3 -4
  60. promnesia/tests/sources/test_takeout.py +3 -5
  61. promnesia/tests/test_cannon.py +5 -5
  62. promnesia/tests/test_cli.py +4 -6
  63. promnesia/tests/test_compare.py +1 -1
  64. promnesia/tests/test_config.py +7 -8
  65. promnesia/tests/test_db_dump.py +11 -12
  66. promnesia/tests/test_extract.py +10 -6
  67. promnesia/tests/test_indexer.py +14 -8
  68. promnesia/tests/test_server.py +2 -3
  69. promnesia/tests/test_traverse.py +0 -2
  70. promnesia/tests/utils.py +4 -4
  71. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +3 -2
  72. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  73. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  74. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  75. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  76. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -0
  77. {promnesia-1.2.20240810.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/sources/org.py CHANGED
@@ -1,16 +1,26 @@
1
- from datetime import datetime
1
+ from __future__ import annotations
2
+
2
3
  import re
3
- from typing import Iterable, List, Set, Optional, Iterator, Tuple, NamedTuple, cast
4
+ from collections.abc import Iterable, Iterator
5
+ from datetime import datetime
4
6
  from pathlib import Path
5
-
6
-
7
- from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime
8
-
7
+ from typing import NamedTuple, Optional, cast
9
8
 
10
9
  import orgparse
11
- from orgparse.date import gene_timestamp_regex, OrgDate
10
+ from orgparse.date import OrgDate, gene_timestamp_regex
12
11
  from orgparse.node import OrgNode
13
12
 
13
+ from promnesia.common import (
14
+ Loc,
15
+ PathIsh,
16
+ Res,
17
+ Results,
18
+ Url,
19
+ Visit,
20
+ file_mtime,
21
+ get_logger,
22
+ iter_urls,
23
+ )
14
24
 
15
25
  UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
16
26
 
@@ -36,7 +46,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
36
46
  """
37
47
 
38
48
  class Parsed(NamedTuple):
39
- dt: Optional[datetime]
49
+ dt: datetime | None
40
50
  heading: str
41
51
 
42
52
 
@@ -74,7 +84,7 @@ def _get_heading(n: OrgNode):
74
84
  return '' if n.is_root() else n.get_heading(format='raw')
75
85
 
76
86
 
77
- def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgNode]]]:
87
+ def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]:
78
88
  try:
79
89
  parsed = _parse_node(node)
80
90
  except Exception as e:
@@ -98,7 +108,7 @@ def get_body_compat(node: OrgNode) -> str:
98
108
  # get_body was only added to root in 0.2.0
99
109
  for x in warn_old_orgparse_once():
100
110
  # ugh. really crap, but it will at least only warn once... (becaue it caches)
101
- raise x
111
+ raise x # noqa: B904
102
112
  return UPDATE_ORGPARSE_WARNING
103
113
  else:
104
114
  raise e
@@ -1,10 +1,9 @@
1
- from ..common import get_logger, get_tmpdir, PathIsh, _is_windows
2
- from ..compat import removeprefix
1
+ from __future__ import annotations
3
2
 
4
3
  from functools import lru_cache
5
4
  from pathlib import Path
6
- import os
7
- from typing import List
5
+
6
+ from promnesia.common import PathIsh, _is_windows, get_logger, get_tmpdir
8
7
 
9
8
  # https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
10
9
  _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
@@ -12,16 +11,16 @@ _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@
12
11
  if _is_windows:
13
12
  # wtf? for some reason on windows (in cmd.exe specificaly) \b isn't working...
14
13
  # this will make the regex a bit less precise, but not end of the world
15
- _URL_REGEX = removeprefix(_URL_REGEX, r'\b')
14
+ _URL_REGEX = _URL_REGEX.removeprefix(r'\b')
16
15
 
17
16
 
18
- @lru_cache()
17
+ @lru_cache
19
18
  def _has_grep() -> bool:
20
19
  import shutil
21
20
  return shutil.which('grep') is not None
22
21
 
23
22
 
24
- Command = List[str]
23
+ Command = list[str]
25
24
 
26
25
 
27
26
  _GREP_ARGS: Command = [
@@ -39,7 +38,7 @@ if not _is_windows:
39
38
 
40
39
  # NOTE: grep/findstr exit with code 1 on no matches...
41
40
  # we hack around it in shellcmd module (search 'grep')
42
- def _grep(*, paths: List[str], recursive: bool) -> Command:
41
+ def _grep(*, paths: list[str], recursive: bool) -> Command:
43
42
  return [
44
43
  'grep',
45
44
  *(['-r'] if recursive else []),
@@ -91,26 +90,26 @@ def extract_from_path(path: PathIsh) -> Command:
91
90
  logger = get_logger()
92
91
  if pp.is_dir(): # TODO handle archives here???
93
92
  return _extract_from_dir(str(pp))
94
- else:
95
- if any(pp.suffix == ex for ex in (
96
- '.xz',
97
- '.bz2',
98
- '.gz',
99
- '.zip',
100
- )):
101
- # todo should be debug?
102
- # or should delete it completely, feels like unpacking archives here is a bit too much
103
- raise RuntimeError(f"Archives aren't supported yet: {path}")
104
- logger.info(f"Extracting from compressed file {path}")
105
- import lzma
106
- from tempfile import NamedTemporaryFile
107
- # TODO hopefully, no collisions
108
- import os.path
109
- fname = os.path.join(tdir.name, os.path.basename(path))
110
- with open(fname, 'wb') as fo:
111
- with lzma.open(path, 'r') as cf:
112
- fo.write(cf.read())
113
- return _extract_from_file(fname)
114
- else:
115
- r = _extract_from_file(str(pp))
116
- return r
93
+
94
+ if any(pp.suffix == ex for ex in (
95
+ '.xz',
96
+ '.bz2',
97
+ '.gz',
98
+ '.zip',
99
+ )):
100
+ # todo should be debug?
101
+ # or should delete it completely, feels like unpacking archives here is a bit too much
102
+ raise RuntimeError(f"Archives aren't supported yet: {path}")
103
+ # logger.info(f"Extracting from compressed file {path}")
104
+ # import lzma
105
+ # from tempfile import NamedTemporaryFile
106
+ # # TODO hopefully, no collisions
107
+ # import os.path
108
+ # fname = os.path.join(tdir.name, os.path.basename(path))
109
+ # with open(fname, 'wb') as fo:
110
+ # with lzma.open(path, 'r') as cf:
111
+ # fo.write(cf.read())
112
+ # return _extract_from_file(fname)
113
+
114
+ r = _extract_from_file(str(pp))
115
+ return r
@@ -1,11 +1,12 @@
1
1
  '''
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks
3
3
  '''
4
- from ..common import Visit, Loc, Results
4
+
5
+ from promnesia.common import Loc, Results, Visit
5
6
 
6
7
 
7
8
  def index() -> Results:
8
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
9
10
  from my.pocket import articles
10
11
 
11
12
  # TODO use docstring from my. module? E.g. describing which pocket format is expected
@@ -2,21 +2,27 @@
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module
3
3
  '''
4
4
 
5
+ from __future__ import annotations
6
+
7
+ import typing
5
8
  from itertools import chain
6
- from typing import Set, Optional, Type
7
9
 
8
- from ..common import Visit, Loc, extract_urls, Results, logger
10
+ from promnesia.common import Loc, Results, Visit, extract_urls, logger
11
+
12
+ if typing.TYPE_CHECKING:
13
+ from my.reddit.common import Comment, RedditBase, Save, Submission, Upvote
14
+
9
15
 
16
+ def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results:
17
+ from . import hpi # noqa: F401
10
18
 
11
- def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results:
12
- from . import hpi
13
19
  try:
14
- from my.reddit.all import submissions, comments, saved, upvoted
20
+ from my.reddit.all import comments, saved, submissions, upvoted
15
21
  except ModuleNotFoundError as e:
16
22
  if "No module named 'my.reddit.all'" in str(e):
17
23
  import warnings
18
24
  warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
19
- from my.reddit import submissions, comments, saved, upvoted
25
+ from my.reddit import comments, saved, submissions, upvoted
20
26
  else:
21
27
  raise e
22
28
 
@@ -58,7 +64,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
58
64
  # mostly here so we can keep track of how the user
59
65
  # wants to render markdown
60
66
  class RedditRenderer:
61
- def __init__(self, render_markdown: bool = False) -> None:
67
+ def __init__(self, *, render_markdown: bool = False) -> None:
62
68
  self._link_extractor = None
63
69
  self._parser_cls = None
64
70
  try:
@@ -77,7 +83,7 @@ class RedditRenderer:
77
83
  self.render_markdown = render_markdown
78
84
 
79
85
 
80
- def _from_comment(self, i: 'Comment') -> Results:
86
+ def _from_comment(self, i: Comment) -> Results:
81
87
  locator = Loc.make(
82
88
  title='Reddit comment',
83
89
  href=i.url,
@@ -85,7 +91,7 @@ class RedditRenderer:
85
91
  yield from self._from_common(i, locator=locator)
86
92
 
87
93
 
88
- def _from_submission(self, i: 'Submission') -> Results:
94
+ def _from_submission(self, i: Submission) -> Results:
89
95
  locator = Loc.make(
90
96
  title=f'Reddit submission: {i.title}',
91
97
  href=i.url,
@@ -93,7 +99,7 @@ class RedditRenderer:
93
99
  yield from self._from_common(i, locator=locator)
94
100
 
95
101
 
96
- def _from_upvote(self, i: 'Upvote') -> Results:
102
+ def _from_upvote(self, i: Upvote) -> Results:
97
103
  locator = Loc.make(
98
104
  title='Reddit upvote',
99
105
  href=i.url,
@@ -101,7 +107,7 @@ class RedditRenderer:
101
107
  yield from self._from_common(i, locator=locator)
102
108
 
103
109
 
104
- def _from_save(self, i: 'Save') -> Results:
110
+ def _from_save(self, i: Save) -> Results:
105
111
  locator = Loc.make(
106
112
  title='Reddit save',
107
113
  href=i.url,
@@ -117,7 +123,7 @@ class RedditRenderer:
117
123
  return text
118
124
 
119
125
 
120
- def _from_common(self, i: 'RedditBase', locator: Loc) -> Results:
126
+ def _from_common(self, i: RedditBase, locator: Loc) -> Results:
121
127
  urls = [i.url]
122
128
  # TODO this should belong to HPI.. fix permalink handling I guess
123
129
  # ok, it's not present for all of them..
@@ -130,7 +136,7 @@ class RedditRenderer:
130
136
 
131
137
  context = self._render_body(i.text)
132
138
 
133
- emitted: Set[str] = set()
139
+ emitted: set[str] = set()
134
140
 
135
141
  for url in chain(urls, extract_urls(i.text)):
136
142
  if url in emitted:
@@ -165,8 +171,3 @@ class RedditRenderer:
165
171
  )
166
172
  emitted.add(res.url)
167
173
 
168
-
169
- import typing
170
- if typing.TYPE_CHECKING:
171
- from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase
172
-
@@ -2,7 +2,7 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Roam Research data
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
@@ -43,6 +43,7 @@ def _collect(node: 'RoamNode') -> Results:
43
43
 
44
44
 
45
45
  import typing
46
+
46
47
  if typing.TYPE_CHECKING:
47
48
  import my.roamresearch as RR
48
49
  RoamNode = RR.Node
promnesia/sources/rss.py CHANGED
@@ -2,14 +2,12 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
3
3
  '''
4
4
 
5
- from itertools import chain
6
-
7
- from ..common import Visit, Loc, extract_urls, Results, get_logger
8
-
9
5
  from datetime import datetime
10
6
 
11
7
  import pytz
12
8
 
9
+ from promnesia.common import Loc, Results, Visit
10
+
13
11
  # arbitrary, 2011-11-04 00:05:23.283+00:00
14
12
  default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
15
13
  # TODO FIXME allow for visit not to have datetime?
@@ -17,6 +15,7 @@ default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
17
15
 
18
16
  def index() -> Results:
19
17
  from my.rss.all import subscriptions
18
+
20
19
  for feed in subscriptions():
21
20
  # TODO locator should be optional too? although could use direct link in the rss reader interface
22
21
  locator = Loc.make(title='my.rss')
@@ -2,18 +2,31 @@
2
2
  Greps out URLs from an arbitrary shell command results.
3
3
  """
4
4
 
5
- from datetime import datetime
5
+ from __future__ import annotations
6
+
6
7
  import os
7
8
  import re
8
- from subprocess import run, PIPE
9
- from typing import Union, Sequence
10
9
  import warnings
10
+ from collections.abc import Sequence
11
+ from datetime import datetime
12
+ from subprocess import PIPE, run
13
+
14
+ from promnesia.common import (
15
+ Loc,
16
+ PathIsh,
17
+ Results,
18
+ Visit,
19
+ _is_windows,
20
+ extract_urls,
21
+ file_mtime,
22
+ get_system_tz,
23
+ now_tz,
24
+ )
11
25
 
12
- from ..common import Visit, Loc, Results, extract_urls, file_mtime, get_system_tz, now_tz, _is_windows, PathIsh
13
26
  from .plaintext import _has_grep
14
27
 
15
28
 
16
- def index(command: Union[str, Sequence[PathIsh]]) -> Results:
29
+ def index(command: str | Sequence[PathIsh]) -> Results:
17
30
  cmd: Sequence[PathIsh]
18
31
  cmds: str
19
32
  if isinstance(command, str):
@@ -71,7 +84,7 @@ def index(command: Union[str, Sequence[PathIsh]]) -> Results:
71
84
  context=context,
72
85
  )
73
86
 
74
- r = run(cmd, stdout=PIPE)
87
+ r = run(cmd, stdout=PIPE, check=False)
75
88
  if r.returncode > 0:
76
89
  if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
77
90
  r.check_returncode()
@@ -1,23 +1,23 @@
1
1
  """
2
2
  Collects visits from Signal Desktop's encrypted SQLIite db(s).
3
3
  """
4
+ from __future__ import annotations
4
5
 
5
6
  # Functions get their defaults from module-data.
6
7
  #
7
8
  # * Open-ciphered-db adapted from:
8
9
  # https://github.com/carderne/signal-export/commit/2284c8f4
9
10
  # * Copyright (c) 2019 Chris Arderne, 2020 Kostis Anagnostopoulos
10
-
11
-
12
11
  import json
13
12
  import logging
14
13
  import platform
15
14
  import sqlite3
16
15
  import subprocess as sbp
16
+ from collections.abc import Iterable, Iterator, Mapping
17
17
  from contextlib import contextmanager
18
18
  from pathlib import Path
19
19
  from textwrap import dedent, indent
20
- from typing import Any, Iterable, Iterator, Mapping, Union, Optional
20
+ from typing import Any, Union
21
21
 
22
22
  from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
23
23
 
@@ -29,7 +29,7 @@ def index(
29
29
  http_only: bool = False,
30
30
  locator_schema: str="editor",
31
31
  append_platform_path: bool = False,
32
- override_key: Optional[str] = None,
32
+ override_key: str | None = None,
33
33
  ) -> Results:
34
34
  """
35
35
  :param db_paths:
@@ -109,10 +109,10 @@ messages_query = dedent(
109
109
  id,
110
110
  type,
111
111
  coalesce(
112
- profileFullName,
113
- profileName,
112
+ profileFullName,
113
+ profileName,
114
114
  name,
115
- profileFamilyName,
115
+ profileFamilyName,
116
116
  e164
117
117
  ) as aname,
118
118
  name,
@@ -237,11 +237,11 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
237
237
  platform_name = platform.system()
238
238
  try:
239
239
  plat_paths = platform_db_paths[platform_name]
240
- except LookupError:
240
+ except LookupError as le:
241
241
  raise ValueError(
242
242
  f"Unknown platform({platform_name}!"
243
243
  f"\n Expected one of {list(platform_db_paths.keys())}."
244
- )
244
+ ) from le
245
245
 
246
246
  if db_paths and append:
247
247
  db_paths = [ # type: ignore[assignment]
@@ -261,7 +261,7 @@ def _config_for_dbfile(db_path: Path, default_key=None) -> Path:
261
261
 
262
262
 
263
263
  def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
264
- with open(signal_desktop_config_path, "r") as conf:
264
+ with Path(signal_desktop_config_path).open() as conf:
265
265
  return json.load(conf)["key"]
266
266
 
267
267
 
@@ -269,6 +269,7 @@ def _key_from_config(signal_desktop_config_path: PathIsh) -> str:
269
269
  def connect_db(
270
270
  db_path: Path,
271
271
  key,
272
+ *,
272
273
  decrypt_db: bool = False,
273
274
  sqlcipher_exe: PathIsh = "sqlcipher",
274
275
  **decryption_pragmas: Mapping[str, Any],
@@ -333,7 +334,7 @@ def connect_db(
333
334
  check=True,
334
335
  input=sql,
335
336
  capture_output=True,
336
- universal_newlines=True,
337
+ text=True,
337
338
  )
338
339
  except sbp.CalledProcessError as ex:
339
340
  prefix = " " * 4
@@ -380,7 +381,7 @@ def _handle_row(row: tuple, db_path: PathIsh, locator_schema: str) -> Results:
380
381
  if not urls:
381
382
  return
382
383
 
383
- assert (
384
+ assert ( # noqa: PT018
384
385
  text and mid and sender and chatname
385
386
  ), f"should have eliminated messages without 'http' or missing ids: {row}"
386
387
 
@@ -400,7 +401,7 @@ def _harvest_db(
400
401
  db_path: Path,
401
402
  messages_query: str,
402
403
  *,
403
- override_key: Optional[str] = None,
404
+ override_key: str | None = None,
404
405
  locator_schema: str = "editor",
405
406
  decrypt_db: bool = False,
406
407
  **decryption_pragmas,
@@ -2,11 +2,11 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] smscalls module
3
3
  '''
4
4
 
5
- from promnesia.common import Visit, Loc, Results, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  from my.smscalls import messages
11
11
 
12
12
  for m in messages():
@@ -2,12 +2,13 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Stackexchange data.
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc
5
+ from promnesia.common import Loc, Results, Visit
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  import my.stackexchange.gdpr as G
11
+
11
12
  for v in G.votes():
12
13
  if isinstance(v, Exception):
13
14
  yield v
@@ -1,11 +1,14 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#mygoogletakeoutpaths][google.takeout]] module
3
3
  '''
4
- from typing import Iterable, Set, Any, NamedTuple
4
+
5
+ from __future__ import annotations
6
+
5
7
  import warnings
8
+ from collections.abc import Iterable
9
+ from typing import Any, NamedTuple
6
10
 
7
- from ..common import Visit, Loc, Results, logger
8
- from ..compat import removeprefix
11
+ from promnesia.common import Loc, Results, Visit, logger
9
12
 
10
13
 
11
14
  # incase user is using an old version of google_takeout_parser
@@ -14,13 +17,20 @@ class YoutubeCSVStub(NamedTuple):
14
17
 
15
18
 
16
19
  def index() -> Results:
17
- from . import hpi
18
- import json
20
+ from . import hpi # noqa: F401
19
21
 
20
22
  try:
23
+ from google_takeout_parser.models import (
24
+ Activity,
25
+ ChromeHistory,
26
+ LikedYoutubeVideo,
27
+ YoutubeComment,
28
+ )
29
+ from google_takeout_parser.parse_csv import (
30
+ extract_comment_links,
31
+ reconstruct_comment_content,
32
+ )
21
33
  from my.google.takeout.parser import events
22
- from google_takeout_parser.models import Activity, YoutubeComment, LikedYoutubeVideo, ChromeHistory
23
- from google_takeout_parser.parse_csv import reconstruct_comment_content, extract_comment_links
24
34
  except ModuleNotFoundError as ex:
25
35
  logger.exception(ex)
26
36
  yield ex
@@ -32,7 +42,7 @@ def index() -> Results:
32
42
  return
33
43
 
34
44
 
35
- _seen: Set[str] = {
45
+ _seen: set[str] = {
36
46
  # these are definitely not useful for promnesia
37
47
  'Location',
38
48
  'PlaceVisit',
@@ -54,7 +64,7 @@ def index() -> Results:
54
64
  if et_name in _seen:
55
65
  return
56
66
  _seen.add(et_name)
57
- yield RuntimeError(f"Unhandled event {repr(type(e))}: {e}")
67
+ yield RuntimeError(f"Unhandled event {type(e)!r}: {e}")
58
68
 
59
69
  for e in events():
60
70
  if isinstance(e, Exception):
@@ -67,13 +77,13 @@ def index() -> Results:
67
77
  # when you follow something from search the actual url goes after this
68
78
  # e.g. https://www.google.com/url?q=https://en.wikipedia.org/wiki/Clapham
69
79
  # note: also title usually starts with 'Visited ', in such case but perhaps fine to keep it
70
- url = removeprefix(url, "https://www.google.com/url?q=")
80
+ url = url.removeprefix("https://www.google.com/url?q=")
71
81
  title = e.title
72
82
 
73
83
  if e.header == 'Chrome':
74
84
  # title contains 'Visited <page title>' in this case
75
85
  context = None
76
- title = removeprefix(title, 'Visited ')
86
+ title = title.removeprefix('Visited ')
77
87
  elif e.header in _CLEAR_CONTEXT_FOR_HEADERS:
78
88
  # todo perhaps could add to some sort of metadata?
79
89
  # only useful for debugging really
@@ -131,7 +141,7 @@ def index() -> Results:
131
141
  url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
132
142
  )
133
143
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
134
- contentJSON = json.loads(e.contentJSON)
144
+ contentJSON = e.contentJSON
135
145
  content = reconstruct_comment_content(contentJSON, format='text')
136
146
  if isinstance(content, Exception):
137
147
  yield content
@@ -149,7 +159,7 @@ def index() -> Results:
149
159
  url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
150
160
  )
151
161
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
152
- contentJSON = json.loads(e.contentJSON)
162
+ contentJSON = e.contentJSON
153
163
  content = reconstruct_comment_content(contentJSON, format='text')
154
164
  if isinstance(content, Exception):
155
165
  yield content
@@ -1,9 +1,13 @@
1
- from ..common import Visit, logger, PathIsh, Url, Loc, Results
1
+ from __future__ import annotations
2
+
3
+ from promnesia.common import Loc, Results, Visit, logger
4
+
2
5
 
3
6
  # TODO make an iterator, insert in db as we go? handle errors gracefully?
4
7
  def index() -> Results:
5
- from . import hpi
8
+ from . import hpi # noqa: F401,I001
6
9
  from my.google.takeout.paths import get_takeouts
10
+
7
11
  takeouts = list(get_takeouts())
8
12
  # TODO if no takeouts, raise?
9
13
  # although could raise a warning on top level, when source emitted no takeouts
@@ -22,19 +26,17 @@ def index() -> Results:
22
26
 
23
27
 
24
28
 
25
- import pytz
26
- from itertools import chain
29
+ import json
30
+ from collections.abc import Iterable
27
31
  from datetime import datetime
28
- from typing import List, Optional, Iterable, TYPE_CHECKING
32
+ from itertools import chain
29
33
  from pathlib import Path
30
- import json
31
-
32
-
33
- from .. import config
34
-
35
34
 
35
+ import pytz
36
36
  from more_itertools import unique_everseen
37
37
 
38
+ from promnesia import config
39
+
38
40
  try:
39
41
  from cachew import cachew
40
42
  except ModuleNotFoundError as me:
@@ -50,7 +52,9 @@ TakeoutPath = Path
50
52
 
51
53
 
52
54
  def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
55
+ # FIXME switch to actual kompress? and use CPath?
53
56
  from my.core.kompress import kexists
57
+
54
58
  # TODO glob
55
59
  # TODO not sure about windows path separators??
56
60
  spath = 'Takeout/My Activity/' + kind
@@ -61,7 +65,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
61
65
 
62
66
  locator = Loc.file(spath)
63
67
  from my.google.takeout.html import read_html
64
- for dt, url, title in read_html(takeout, spath):
68
+ for dt, url, _title in read_html(takeout, spath):
65
69
  yield Visit(
66
70
  url=url,
67
71
  dt=dt,