promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. promnesia/__init__.py +4 -1
  2. promnesia/__main__.py +72 -59
  3. promnesia/cannon.py +90 -89
  4. promnesia/common.py +74 -62
  5. promnesia/compare.py +15 -10
  6. promnesia/config.py +22 -17
  7. promnesia/database/dump.py +1 -2
  8. promnesia/extract.py +6 -6
  9. promnesia/logging.py +27 -15
  10. promnesia/misc/install_server.py +25 -19
  11. promnesia/server.py +69 -53
  12. promnesia/sources/auto.py +65 -51
  13. promnesia/sources/browser.py +7 -2
  14. promnesia/sources/browser_legacy.py +51 -40
  15. promnesia/sources/demo.py +0 -1
  16. promnesia/sources/fbmessenger.py +0 -1
  17. promnesia/sources/filetypes.py +15 -11
  18. promnesia/sources/github.py +4 -1
  19. promnesia/sources/guess.py +4 -1
  20. promnesia/sources/hackernews.py +5 -7
  21. promnesia/sources/hpi.py +3 -1
  22. promnesia/sources/html.py +4 -2
  23. promnesia/sources/instapaper.py +1 -0
  24. promnesia/sources/markdown.py +4 -4
  25. promnesia/sources/org.py +17 -8
  26. promnesia/sources/plaintext.py +14 -11
  27. promnesia/sources/pocket.py +2 -1
  28. promnesia/sources/reddit.py +5 -8
  29. promnesia/sources/roamresearch.py +3 -1
  30. promnesia/sources/rss.py +4 -5
  31. promnesia/sources/shellcmd.py +3 -6
  32. promnesia/sources/signal.py +14 -14
  33. promnesia/sources/smscalls.py +0 -1
  34. promnesia/sources/stackexchange.py +2 -2
  35. promnesia/sources/takeout.py +14 -21
  36. promnesia/sources/takeout_legacy.py +16 -10
  37. promnesia/sources/telegram.py +7 -3
  38. promnesia/sources/telegram_legacy.py +5 -5
  39. promnesia/sources/twitter.py +1 -1
  40. promnesia/sources/vcs.py +6 -3
  41. promnesia/sources/viber.py +2 -2
  42. promnesia/sources/website.py +4 -3
  43. promnesia/sqlite.py +10 -7
  44. promnesia/tests/common.py +2 -0
  45. promnesia/tests/server_helper.py +2 -2
  46. promnesia/tests/sources/test_filetypes.py +9 -7
  47. promnesia/tests/sources/test_hypothesis.py +7 -3
  48. promnesia/tests/sources/test_org.py +7 -2
  49. promnesia/tests/sources/test_plaintext.py +9 -7
  50. promnesia/tests/sources/test_shellcmd.py +10 -9
  51. promnesia/tests/test_cannon.py +254 -237
  52. promnesia/tests/test_cli.py +8 -2
  53. promnesia/tests/test_compare.py +16 -12
  54. promnesia/tests/test_db_dump.py +4 -3
  55. promnesia/tests/test_extract.py +7 -4
  56. promnesia/tests/test_indexer.py +10 -10
  57. promnesia/tests/test_server.py +10 -10
  58. promnesia/tests/utils.py +1 -5
  59. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  60. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  61. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  62. promnesia/kjson.py +0 -122
  63. promnesia/sources/__init__.pyi +0 -0
  64. promnesia-1.3.20241021.dist-info/METADATA +0 -55
  65. promnesia-1.3.20241021.dist-info/RECORD +0 -83
  66. promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
  67. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  68. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  '''
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] github module
3
3
  '''
4
+
4
5
  from __future__ import annotations
5
6
 
6
7
  # Note: requires the 'mistletoe' module if you enable render_markdown
@@ -16,7 +17,9 @@ def index(*, render_markdown: bool = False) -> Results:
16
17
  from .markdown import TextParser, extract_from_text
17
18
  except ImportError as import_err:
18
19
  logger.exception(import_err)
19
- logger.critical("Could not import markdown module to render github body markdown. Try 'python3 -m pip install mistletoe'")
20
+ logger.critical(
21
+ "Could not import markdown module to render github body markdown. Try 'python3 -m pip install mistletoe'"
22
+ )
20
23
  render_markdown = False
21
24
 
22
25
  for e in events():
@@ -22,14 +22,17 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
22
22
  ps = str(path)
23
23
  # TODO better url detection
24
24
 
25
- index_: Any # meh
25
+ index_: Any # meh
26
26
  if is_git_repo(ps):
27
27
  from . import vcs
28
+
28
29
  index_ = vcs.index
29
30
  elif is_website(ps):
30
31
  from . import website
32
+
31
33
  index_ = website.index
32
34
  else:
33
35
  from . import auto
36
+
34
37
  index_ = auto.index
35
38
  yield from index_(path, *args, **kwargs)
@@ -21,9 +21,7 @@ def index() -> Results:
21
21
  title = item.title
22
22
  elif item.text_html:
23
23
  title = item.text_html
24
- title = textwrap.shorten(
25
- title, width=79, placeholder="…",
26
- break_long_words=True)
24
+ title = textwrap.shorten(title, width=79, placeholder="…", break_long_words=True)
27
25
  # The locator is always the HN story. If the story is a link (as
28
26
  # opposed to a text post), we insert a visit such that the link
29
27
  # will point back to the corresponding HN story.
@@ -33,8 +31,8 @@ def index() -> Results:
33
31
  urls.append(item.url)
34
32
  for url in urls:
35
33
  yield Visit(
36
- url=url,
37
- dt=item.created,
38
- locator=loc,
39
- context=title,
34
+ url=url,
35
+ dt=item.created,
36
+ locator=loc,
37
+ context=title,
40
38
  )
promnesia/sources/hpi.py CHANGED
@@ -8,4 +8,6 @@ try:
8
8
  import my # noqa: F401
9
9
  except ImportError as e:
10
10
  logger.exception(e)
11
- logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')")
11
+ logger.critical(
12
+ "Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')"
13
+ )
promnesia/sources/html.py CHANGED
@@ -7,7 +7,7 @@ from __future__ import annotations
7
7
  from collections.abc import Iterator
8
8
  from pathlib import Path
9
9
 
10
- from bs4 import BeautifulSoup
10
+ from bs4 import BeautifulSoup, Tag
11
11
 
12
12
  from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
13
13
 
@@ -25,11 +25,13 @@ def extract_urls_from_html(s: str) -> Iterator[Url]:
25
25
  """
26
26
  soup = BeautifulSoup(s, 'lxml')
27
27
  for a in soup.find_all('a'):
28
+ assert isinstance(a, Tag), a # make mypy happy
28
29
  href = a.attrs.get('href')
29
30
  if href is None or ('://' not in href):
30
31
  # second condition means relative link
31
32
  continue
32
- text = a.text
33
+ assert isinstance(href, str), href # make mypy happy
34
+ text: str = a.text
33
35
  yield (href, text)
34
36
 
35
37
 
@@ -1,6 +1,7 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module
3
3
  '''
4
+
4
5
  from promnesia.common import Loc, Results, Visit
5
6
 
6
7
 
@@ -4,10 +4,10 @@ from collections.abc import Iterator
4
4
  from pathlib import Path
5
5
  from typing import NamedTuple
6
6
 
7
- import mistletoe # type: ignore
8
- import mistletoe.block_token as BT # type: ignore
9
- from mistletoe.html_renderer import HTMLRenderer # type: ignore
10
- from mistletoe.span_token import AutoLink, Link # type: ignore
7
+ import mistletoe # type: ignore[import-untyped]
8
+ import mistletoe.block_token as BT # type: ignore[import-untyped]
9
+ from mistletoe.html_renderer import HTMLRenderer # type: ignore[import-untyped]
10
+ from mistletoe.span_token import AutoLink, Link # type: ignore[import-untyped]
11
11
 
12
12
  from promnesia.common import (
13
13
  Extraction,
promnesia/sources/org.py CHANGED
@@ -4,7 +4,7 @@ import re
4
4
  from collections.abc import Iterable, Iterator
5
5
  from datetime import datetime
6
6
  from pathlib import Path
7
- from typing import NamedTuple, Optional, cast
7
+ from typing import NamedTuple, cast
8
8
 
9
9
  import orgparse
10
10
  from orgparse.date import OrgDate, gene_timestamp_regex
@@ -25,6 +25,8 @@ from promnesia.common import (
25
25
  UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
26
26
 
27
27
  _warned = False
28
+
29
+
28
30
  def warn_old_orgparse_once() -> Iterable[Exception]:
29
31
  global _warned
30
32
  if _warned:
@@ -45,6 +47,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
45
47
  ** subnote
46
48
  """
47
49
 
50
+
48
51
  class Parsed(NamedTuple):
49
52
  dt: datetime | None
50
53
  heading: str
@@ -56,14 +59,14 @@ def _parse_node(n: OrgNode) -> Parsed:
56
59
 
57
60
  heading = n.get_heading('raw')
58
61
  pp = n.properties
59
- createds = cast(Optional[str], pp.get('CREATED', None))
62
+ createds = cast(str | None, pp.get('CREATED', None))
60
63
  if createds is None:
61
64
  # TODO replace with 'match', but need to strip off priority etc first?
62
65
  # see _parse_heading in orgparse
63
66
  # todo maybe use n.get_timestamps(inactive=True, point=True)? only concern is that it's searching in the body as well?
64
67
  m = CREATED_RGX.search(heading)
65
68
  if m is not None:
66
- createds = m.group(0) # could be None
69
+ createds = m.group(0) # could be None
67
70
  # todo a bit hacky..
68
71
  heading = heading.replace(createds + ' ', '')
69
72
  if createds is not None:
@@ -72,7 +75,11 @@ def _parse_node(n: OrgNode) -> Parsed:
72
75
  dt = None
73
76
  else:
74
77
  [odt] = OrgDate.list_from_str(createds)
75
- dt = odt.start
78
+ start = odt.start
79
+ if not isinstance(start, datetime): # could be date
80
+ dt = datetime.combine(start, datetime.min.time()) # meh, but the best we can do?
81
+ else:
82
+ dt = start
76
83
  else:
77
84
  dt = None
78
85
  return Parsed(dt=dt, heading=heading)
@@ -160,7 +167,7 @@ def extract_from_file(fname: PathIsh) -> Results:
160
167
 
161
168
  (parsed, node) = wr
162
169
  dt = parsed.dt
163
- assert dt is not None # shouldn't be because of fallback
170
+ assert dt is not None # shouldn't be because of fallback
164
171
  for r in iter_org_urls(node):
165
172
  # TODO get body recursively? not sure
166
173
  try:
@@ -170,7 +177,7 @@ def extract_from_file(fname: PathIsh) -> Results:
170
177
  ctx = parsed.heading + tagss + '\n' + get_body_compat(node)
171
178
  except Exception as e:
172
179
  yield e
173
- ctx = 'ERROR' # TODO more context?
180
+ ctx = 'ERROR' # TODO more context?
174
181
 
175
182
  if isinstance(r, Url):
176
183
  yield Visit(
@@ -178,9 +185,11 @@ def extract_from_file(fname: PathIsh) -> Results:
178
185
  dt=dt,
179
186
  locator=Loc.file(
180
187
  fname,
181
- line=getattr(node, 'linenumber', None), # make it defensive so it works against older orgparse (pre 0.2)
188
+ line=getattr(
189
+ node, 'linenumber', None
190
+ ), # make it defensive so it works against older orgparse (pre 0.2)
182
191
  ),
183
192
  context=ctx,
184
193
  )
185
- else: # error
194
+ else: # error
186
195
  yield r
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from functools import lru_cache
4
4
  from pathlib import Path
5
5
 
6
- from promnesia.common import PathIsh, _is_windows, get_logger, get_tmpdir
6
+ from promnesia.common import PathIsh, _is_windows
7
7
 
8
8
  # https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
9
9
  _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
@@ -17,6 +17,7 @@ if _is_windows:
17
17
  @lru_cache
18
18
  def _has_grep() -> bool:
19
19
  import shutil
20
+
20
21
  return shutil.which('grep') is not None
21
22
 
22
23
 
@@ -25,9 +26,9 @@ Command = list[str]
25
26
 
26
27
  _GREP_ARGS: Command = [
27
28
  '--color=never',
28
- '-H', # always show filename TODO not sure if works on osx
29
- '-n', # print line numbers (to restore context)
30
- '-I', # ignore binaries
29
+ '-H', # always show filename TODO not sure if works on osx
30
+ '-n', # print line numbers (to restore context)
31
+ '-I', # ignore binaries
31
32
  ]
32
33
 
33
34
  if not _is_windows:
@@ -36,6 +37,7 @@ if not _is_windows:
36
37
  '--exclude-dir=".git"',
37
38
  ]
38
39
 
40
+
39
41
  # NOTE: grep/findstr exit with code 1 on no matches...
40
42
  # we hack around it in shellcmd module (search 'grep')
41
43
  def _grep(*, paths: list[str], recursive: bool) -> Command:
@@ -43,11 +45,12 @@ def _grep(*, paths: list[str], recursive: bool) -> Command:
43
45
  'grep',
44
46
  *(['-r'] if recursive else []),
45
47
  *_GREP_ARGS,
46
- '-E', # 'extended' syntax
48
+ '-E', # 'extended' syntax
47
49
  _URL_REGEX,
48
50
  *paths,
49
51
  ]
50
52
 
53
+
51
54
  def _findstr(*, path: str, recursive: bool) -> Command:
52
55
  return [
53
56
  'findstr',
@@ -85,18 +88,18 @@ def _extract_from_file(path: str) -> Command:
85
88
  def extract_from_path(path: PathIsh) -> Command:
86
89
  pp = Path(path)
87
90
 
88
- tdir = get_tmpdir()
89
-
90
- logger = get_logger()
91
- if pp.is_dir(): # TODO handle archives here???
91
+ if pp.is_dir(): # TODO handle archives here???
92
92
  return _extract_from_dir(str(pp))
93
93
 
94
- if any(pp.suffix == ex for ex in (
94
+ if any(
95
+ pp.suffix == ex
96
+ for ex in (
95
97
  '.xz',
96
98
  '.bz2',
97
99
  '.gz',
98
100
  '.zip',
99
- )):
101
+ )
102
+ ):
100
103
  # todo should be debug?
101
104
  # or should delete it completely, feels like unpacking archives here is a bit too much
102
105
  raise RuntimeError(f"Archives aren't supported yet: {path}")
@@ -16,7 +16,8 @@ def index() -> Results:
16
16
  loc = Loc.make(title=title, href=a.pocket_link)
17
17
  # Add a reverse locator so that the Promnesia browser extension shows a
18
18
  # link on the Pocket page back to the original URL.
19
- loc_rev = Loc.make(title=title, href=a.url)
19
+ # FIXME need to actually use it
20
+ _loc_rev = Loc.make(title=title, href=a.url)
20
21
  hls = a.highlights
21
22
  excerpt = a.json.get('excerpt', None)
22
23
  if len(hls) == 0:
@@ -21,6 +21,7 @@ def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | Non
21
21
  except ModuleNotFoundError as e:
22
22
  if "No module named 'my.reddit.all'" in str(e):
23
23
  import warnings
24
+
24
25
  warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
25
26
  from my.reddit import comments, saved, submissions, upvoted
26
27
  else:
@@ -69,6 +70,7 @@ class RedditRenderer:
69
70
  self._parser_cls = None
70
71
  try:
71
72
  from .markdown import TextParser, extract_from_text
73
+
72
74
  self._link_extractor = extract_from_text
73
75
  self._parser_cls = TextParser
74
76
  except ImportError as import_err:
@@ -78,11 +80,12 @@ class RedditRenderer:
78
80
  # only send error if the user is trying to enable this feature
79
81
  if render_markdown:
80
82
  logger.exception(import_err)
81
- logger.critical("Could not import markdown module to render reddit markdown. Try 'python3 -m pip install mistletoe'")
83
+ logger.critical(
84
+ "Could not import markdown module to render reddit markdown. Try 'python3 -m pip install mistletoe'"
85
+ )
82
86
  render_markdown = False # force to be false, couldn't import
83
87
  self.render_markdown = render_markdown
84
88
 
85
-
86
89
  def _from_comment(self, i: Comment) -> Results:
87
90
  locator = Loc.make(
88
91
  title='Reddit comment',
@@ -90,7 +93,6 @@ class RedditRenderer:
90
93
  )
91
94
  yield from self._from_common(i, locator=locator)
92
95
 
93
-
94
96
  def _from_submission(self, i: Submission) -> Results:
95
97
  locator = Loc.make(
96
98
  title=f'Reddit submission: {i.title}',
@@ -98,7 +100,6 @@ class RedditRenderer:
98
100
  )
99
101
  yield from self._from_common(i, locator=locator)
100
102
 
101
-
102
103
  def _from_upvote(self, i: Upvote) -> Results:
103
104
  locator = Loc.make(
104
105
  title='Reddit upvote',
@@ -106,7 +107,6 @@ class RedditRenderer:
106
107
  )
107
108
  yield from self._from_common(i, locator=locator)
108
109
 
109
-
110
110
  def _from_save(self, i: Save) -> Results:
111
111
  locator = Loc.make(
112
112
  title='Reddit save',
@@ -114,7 +114,6 @@ class RedditRenderer:
114
114
  )
115
115
  yield from self._from_common(i, locator=locator)
116
116
 
117
-
118
117
  # to allow for possible subclassing by the user?
119
118
  def _render_body(self, text: str) -> str:
120
119
  if self.render_markdown and self._parser_cls is not None:
@@ -122,7 +121,6 @@ class RedditRenderer:
122
121
  else:
123
122
  return text
124
123
 
125
-
126
124
  def _from_common(self, i: RedditBase, locator: Loc) -> Results:
127
125
  urls = [i.url]
128
126
  # TODO this should belong to HPI.. fix permalink handling I guess
@@ -170,4 +168,3 @@ class RedditRenderer:
170
168
  locator=locator,
171
169
  )
172
170
  emitted.add(res.url)
173
-
@@ -7,6 +7,7 @@ from promnesia.common import Loc, Results, Visit, extract_urls
7
7
 
8
8
  def index() -> Results:
9
9
  import my.roamresearch as RR
10
+
10
11
  roam = RR.roam()
11
12
  for node in roam.traverse():
12
13
  yield from _collect(node)
@@ -14,7 +15,7 @@ def index() -> Results:
14
15
 
15
16
  def _collect(node: 'RoamNode') -> Results:
16
17
  title = node.title
17
- body = node.body or ''
18
+ body = node.body or ''
18
19
  if title is None:
19
20
  # most notes don't have title, so we just take the first line instead..
20
21
  lines = body.splitlines(keepends=True)
@@ -46,4 +47,5 @@ import typing
46
47
 
47
48
  if typing.TYPE_CHECKING:
48
49
  import my.roamresearch as RR
50
+
49
51
  RoamNode = RR.Node
promnesia/sources/rss.py CHANGED
@@ -2,17 +2,16 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for RSS data.
3
3
  '''
4
4
 
5
- from datetime import datetime
6
-
7
- import pytz
5
+ from datetime import datetime, timezone
8
6
 
9
7
  from promnesia.common import Loc, Results, Visit
10
8
 
11
9
  # arbitrary, 2011-11-04 00:05:23.283+00:00
12
- default_datetime = datetime.fromtimestamp(1320365123, tz=pytz.utc)
10
+ default_datetime = datetime.fromtimestamp(1320365123, tz=timezone.utc)
13
11
  # TODO FIXME allow for visit not to have datetime?
14
12
  # I.e. even having context is pretty good!
15
13
 
14
+
16
15
  def index() -> Results:
17
16
  from my.rss.all import subscriptions
18
17
 
@@ -22,6 +21,6 @@ def index() -> Results:
22
21
  yield Visit(
23
22
  url=feed.url,
24
23
  dt=feed.created_at or default_datetime,
25
- context='RSS subscription', # TODO use 'provider', etc?
24
+ context='RSS subscription', # TODO use 'provider', etc?
26
25
  locator=locator,
27
26
  )
@@ -19,7 +19,6 @@ from promnesia.common import (
19
19
  _is_windows,
20
20
  extract_urls,
21
21
  file_mtime,
22
- get_system_tz,
23
22
  now_tz,
24
23
  )
25
24
 
@@ -37,8 +36,6 @@ def index(command: str | Sequence[PathIsh]) -> Results:
37
36
  cmds = ' '.join(map(str, command))
38
37
  cmd = command
39
38
 
40
- tz = get_system_tz()
41
-
42
39
  # ugh... on windows grep does something nasty? e.g:
43
40
  # grep --color=never -r -H -n -I -E http 'D:\\a\\promnesia\\promnesia\\tests\\testdata\\custom'
44
41
  # D:\a\promnesia\promnesia\tests\testdata\custom/file1.txt:1:Right, so this points at http://google.com
@@ -55,9 +52,9 @@ def index(command: str | Sequence[PathIsh]) -> Results:
55
52
  fname = None
56
53
  lineno = None
57
54
  else:
58
- fname = m.group(1)
55
+ fname = m.group(1)
59
56
  lineno = int(m.group(2))
60
- line = m.group(3)
57
+ line = m.group(3)
61
58
 
62
59
  if fname is not None and needs_windows_grep_patching:
63
60
  fname = fname.replace('/', os.sep)
@@ -86,7 +83,7 @@ def index(command: str | Sequence[PathIsh]) -> Results:
86
83
 
87
84
  r = run(cmd, stdout=PIPE, check=False)
88
85
  if r.returncode > 0:
89
- if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
86
+ if not (cmd[0] in {'grep', 'findstr'} and r.returncode == 1): # ugh. grep returns 1 on no matches...
90
87
  r.check_returncode()
91
88
  output = r.stdout
92
89
  assert output is not None
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Collects visits from Signal Desktop's encrypted SQLIite db(s).
3
3
  """
4
+
4
5
  from __future__ import annotations
5
6
 
6
7
  # Functions get their defaults from module-data.
@@ -17,17 +18,15 @@ from collections.abc import Iterable, Iterator, Mapping
17
18
  from contextlib import contextmanager
18
19
  from pathlib import Path
19
20
  from textwrap import dedent, indent
20
- from typing import Any, Union
21
+ from typing import Any
21
22
 
22
23
  from ..common import Loc, PathIsh, Results, Visit, extract_urls, from_epoch
23
24
 
24
- PathIshes = Union[PathIsh, Iterable[PathIsh]]
25
-
26
25
 
27
26
  def index(
28
27
  *db_paths: PathIsh,
29
28
  http_only: bool = False,
30
- locator_schema: str="editor",
29
+ locator_schema: str = "editor",
31
30
  append_platform_path: bool = False,
32
31
  override_key: str | None = None,
33
32
  ) -> Results:
@@ -51,8 +50,7 @@ def index(
51
50
  otherwise, this same key is used for harvesting all db-files.
52
51
  """
53
52
  logger.debug(
54
- "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), "
55
- "overide_key given?(%s), db_paths: %s",
53
+ "http_only?(%s), locator_schema?(%s), append_platform_path?(%s), overide_key given?(%s), db_paths: %s",
56
54
  http_only,
57
55
  locator_schema,
58
56
  append_platform_path,
@@ -171,7 +169,10 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
171
169
 
172
170
  Expansion code adapted from https://stackoverflow.com/a/51108375/548792
173
171
  to handle also degenerate cases (``'', '.', '/'``):
172
+ """
174
173
 
174
+ # NOTE: suppressing doctest from github actions
175
+ """
175
176
  >>> str(next(iter(_get_files('/'))))
176
177
  '/'
177
178
 
@@ -194,7 +195,7 @@ def _expand_path(path_pattern: PathIsh) -> Iterable[Path]:
194
195
  return path.glob(str(Path(*parts))) if parts else [path]
195
196
 
196
197
 
197
- def _expand_paths(paths: PathIshes) -> Iterable[Path]:
198
+ def _expand_paths(paths: PathIsh | Iterable[PathIsh]) -> Iterable[Path]:
198
199
  if _is_pathish(paths):
199
200
  paths = [paths] # type: ignore[list-item]
200
201
  return [pp.resolve() for p in paths for pp in _expand_path(p)] # type: ignore[union-attr]
@@ -214,7 +215,10 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
214
215
  one or more pathish
215
216
 
216
217
  Note: needed `append` here, to resolve paths.
218
+ """
217
219
 
220
+ # NOTE: suppressing doctest from running on Github actions
221
+ """
218
222
  >>> bool(collect_db_paths()) # my home-path
219
223
  True
220
224
  >>> collect_db_paths(None)
@@ -239,8 +243,7 @@ def collect_db_paths(*db_paths: PathIsh, append: bool = False) -> Iterable[Path]
239
243
  plat_paths = platform_db_paths[platform_name]
240
244
  except LookupError as le:
241
245
  raise ValueError(
242
- f"Unknown platform({platform_name}!"
243
- f"\n Expected one of {list(platform_db_paths.keys())}."
246
+ f"Unknown platform({platform_name}!\n Expected one of {list(platform_db_paths.keys())}."
244
247
  ) from le
245
248
 
246
249
  if db_paths and append:
@@ -325,9 +328,7 @@ def connect_db(
325
328
  )
326
329
  sql = "\n".join(sql_cmds)
327
330
  cmd = [sqlcipher_exe, str(db_path)]
328
- logger.debug(
329
- "Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql
330
- )
331
+ logger.debug("Decrypting db '%s' with cmd: %s <<<EOF\n%s\nEOF", db_path, cmd, sql)
331
332
  try:
332
333
  sbp.run(
333
334
  cmd,
@@ -358,12 +359,11 @@ def connect_db(
358
359
  yield db
359
360
  finally:
360
361
  try:
361
- if db:
362
+ if db is not None:
362
363
  db.close()
363
364
  finally:
364
365
  if decrypted_file and decrypted_file.exists():
365
366
  try:
366
-
367
367
  logger.debug("Deleting temporary decrypted db: %s", decrypted_file)
368
368
  decrypted_file.unlink()
369
369
  except Exception as ex:
@@ -10,7 +10,6 @@ def index() -> Results:
10
10
  from my.smscalls import messages
11
11
 
12
12
  for m in messages():
13
-
14
13
  if isinstance(m, Exception):
15
14
  yield m
16
15
  continue
@@ -16,7 +16,7 @@ def index() -> Results:
16
16
  yield Visit(
17
17
  url=v.link,
18
18
  dt=v.when,
19
- context='voted', # todo use the votetype? although maybe worth ignoring downvotes
19
+ context='voted', # todo use the votetype? although maybe worth ignoring downvotes
20
20
  # or, downvotes could have 'negative' ranking or something
21
- locator=Loc.make(title='voted', href=v.link)
21
+ locator=Loc.make(title='voted', href=v.link),
22
22
  )
@@ -35,13 +35,15 @@ def index() -> Results:
35
35
  logger.exception(ex)
36
36
  yield ex
37
37
 
38
- warnings.warn("Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation.")
38
+ warnings.warn(
39
+ "Please set up my.google.takeout.parser module for better takeout support. Falling back to legacy implementation."
40
+ )
39
41
 
40
42
  from . import takeout_legacy
43
+
41
44
  yield from takeout_legacy.index()
42
45
  return
43
46
 
44
-
45
47
  _seen: set[str] = {
46
48
  # these are definitely not useful for promnesia
47
49
  'Location',
@@ -52,10 +54,13 @@ def index() -> Results:
52
54
  imported_yt_csv_models = False
53
55
  try:
54
56
  from google_takeout_parser.models import CSVYoutubeComment, CSVYoutubeLiveChat
57
+
55
58
  imported_yt_csv_models = True
56
59
  except ImportError:
57
60
  # warn user to upgrade google_takeout_parser
58
- warnings.warn("Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments")
61
+ warnings.warn(
62
+ "Please upgrade google_takeout_parser (`pip install -U google_takeout_parser`) to support the new format for youtube comments"
63
+ )
59
64
  CSVYoutubeComment = YoutubeCSVStub # type: ignore[misc,assignment]
60
65
  CSVYoutubeLiveChat = YoutubeCSVStub # type: ignore[misc,assignment]
61
66
 
@@ -130,16 +135,12 @@ def index() -> Results:
130
135
  elif isinstance(e, LikedYoutubeVideo):
131
136
  # TODO not sure if desc makes sense here since it's not user produced data
132
137
  # it's just a part of video meta?
133
- yield Visit(
134
- url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link)
135
- )
138
+ yield Visit(url=e.link, dt=e.dt, context=e.desc, locator=Loc(title=e.title, href=e.link))
136
139
  elif isinstance(e, YoutubeComment):
137
140
  for url in e.urls:
138
141
  # todo: use url_metadata to improve locator?
139
142
  # or maybe just extract first sentence?
140
- yield Visit(
141
- url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url)
142
- )
143
+ yield Visit(url=url, dt=e.dt, context=e.content, locator=Loc(title=e.content, href=url))
143
144
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeComment):
144
145
  contentJSON = e.contentJSON
145
146
  content = reconstruct_comment_content(contentJSON, format='text')
@@ -152,12 +153,8 @@ def index() -> Results:
152
153
  continue
153
154
  context = f"Commented on {e.video_url}"
154
155
  for url in links:
155
- yield Visit(
156
- url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
157
- )
158
- yield Visit(
159
- url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
160
- )
156
+ yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
157
+ yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
161
158
  elif imported_yt_csv_models and isinstance(e, CSVYoutubeLiveChat):
162
159
  contentJSON = e.contentJSON
163
160
  content = reconstruct_comment_content(contentJSON, format='text')
@@ -170,12 +167,8 @@ def index() -> Results:
170
167
  continue
171
168
  context = f"Commented on livestream {e.video_url}"
172
169
  for url in links:
173
- yield Visit(
174
- url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url)
175
- )
176
- yield Visit(
177
- url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url)
178
- )
170
+ yield Visit(url=url, dt=e.dt, context=content, locator=Loc(title=context, href=url))
171
+ yield Visit(url=e.video_url, dt=e.dt, context=content, locator=Loc(title=context, href=e.video_url))
179
172
  else:
180
173
  yield from warn_once_if_not_seen(e)
181
174