promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +60 -35
  3. promnesia/cannon.py +27 -27
  4. promnesia/common.py +85 -67
  5. promnesia/compare.py +21 -22
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +23 -23
  8. promnesia/database/common.py +67 -0
  9. promnesia/database/dump.py +188 -0
  10. promnesia/{read_db.py → database/load.py} +16 -17
  11. promnesia/extract.py +14 -11
  12. promnesia/kjson.py +12 -11
  13. promnesia/logging.py +4 -4
  14. promnesia/misc/__init__.pyi +0 -0
  15. promnesia/misc/config_example.py +1 -2
  16. promnesia/misc/install_server.py +7 -9
  17. promnesia/server.py +57 -47
  18. promnesia/sources/__init__.pyi +0 -0
  19. promnesia/sources/auto.py +50 -35
  20. promnesia/sources/auto_logseq.py +6 -5
  21. promnesia/sources/auto_obsidian.py +2 -2
  22. promnesia/sources/browser.py +14 -9
  23. promnesia/sources/browser_legacy.py +26 -16
  24. promnesia/sources/demo.py +19 -3
  25. promnesia/sources/fbmessenger.py +3 -2
  26. promnesia/sources/filetypes.py +16 -7
  27. promnesia/sources/github.py +7 -9
  28. promnesia/sources/guess.py +2 -1
  29. promnesia/sources/hackernews.py +2 -2
  30. promnesia/sources/hpi.py +2 -2
  31. promnesia/sources/html.py +7 -5
  32. promnesia/sources/hypothesis.py +4 -3
  33. promnesia/sources/instapaper.py +2 -2
  34. promnesia/sources/markdown.py +31 -21
  35. promnesia/sources/org.py +27 -13
  36. promnesia/sources/plaintext.py +30 -29
  37. promnesia/sources/pocket.py +3 -2
  38. promnesia/sources/reddit.py +20 -19
  39. promnesia/sources/roamresearch.py +2 -1
  40. promnesia/sources/rss.py +4 -5
  41. promnesia/sources/shellcmd.py +19 -6
  42. promnesia/sources/signal.py +33 -24
  43. promnesia/sources/smscalls.py +2 -2
  44. promnesia/sources/stackexchange.py +4 -3
  45. promnesia/sources/takeout.py +76 -9
  46. promnesia/sources/takeout_legacy.py +24 -12
  47. promnesia/sources/telegram.py +13 -11
  48. promnesia/sources/telegram_legacy.py +18 -7
  49. promnesia/sources/twitter.py +6 -5
  50. promnesia/sources/vcs.py +5 -3
  51. promnesia/sources/viber.py +10 -9
  52. promnesia/sources/website.py +4 -4
  53. promnesia/sources/zulip.py +3 -2
  54. promnesia/sqlite.py +7 -4
  55. promnesia/tests/__init__.py +0 -0
  56. promnesia/tests/common.py +140 -0
  57. promnesia/tests/server_helper.py +67 -0
  58. promnesia/tests/sources/__init__.py +0 -0
  59. promnesia/tests/sources/test_auto.py +65 -0
  60. promnesia/tests/sources/test_filetypes.py +43 -0
  61. promnesia/tests/sources/test_hypothesis.py +39 -0
  62. promnesia/tests/sources/test_org.py +64 -0
  63. promnesia/tests/sources/test_plaintext.py +25 -0
  64. promnesia/tests/sources/test_shellcmd.py +21 -0
  65. promnesia/tests/sources/test_takeout.py +56 -0
  66. promnesia/tests/test_cannon.py +325 -0
  67. promnesia/tests/test_cli.py +40 -0
  68. promnesia/tests/test_compare.py +30 -0
  69. promnesia/tests/test_config.py +289 -0
  70. promnesia/tests/test_db_dump.py +222 -0
  71. promnesia/tests/test_extract.py +65 -0
  72. promnesia/tests/test_extract_urls.py +43 -0
  73. promnesia/tests/test_indexer.py +251 -0
  74. promnesia/tests/test_server.py +291 -0
  75. promnesia/tests/test_traverse.py +39 -0
  76. promnesia/tests/utils.py +35 -0
  77. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
  78. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  79. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  80. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
  81. promnesia/dump.py +0 -105
  82. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  83. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  84. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/sources/demo.py CHANGED
@@ -3,18 +3,34 @@ A dummy source, used for testing
3
3
  Generates a sequence of fake evenly separated visits
4
4
  '''
5
5
 
6
+ from __future__ import annotations
7
+
6
8
  from datetime import datetime, timedelta
7
9
 
8
- from ..common import Results, Visit, Loc
10
+ from promnesia.common import Loc, Results, Visit
11
+
12
+ IsoFormatDt = str
13
+ Seconds = int
14
+
15
+
16
+ # TODO allow passing isoformat string as base_dt?
17
+ # and maybe something similar as delta? start with seconds maybe
18
+ def index(
19
+ count: int = 100,
20
+ *,
21
+ base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000),
22
+ delta: timedelta | Seconds = timedelta(hours=1),
23
+ ) -> Results:
9
24
 
25
+ base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
26
+ delta_ = delta if isinstance(delta, timedelta) else timedelta(seconds=delta)
10
27
 
11
- def index(count: int=100, *, base_dt: datetime=datetime.min + timedelta(days=5000), delta: timedelta=timedelta(hours=1)) -> Results:
12
28
  # todo with some errors too?
13
29
  # todo use data generation library suggested for HPI?
14
30
  for i in range(count):
15
31
  yield Visit(
16
32
  url=f'https://demo.com/page{i}.html',
17
- dt=base_dt + delta * i,
33
+ dt=base_dt_ + delta_ * i,
18
34
  locator=Loc.make('demo'),
19
35
  )
20
36
  # todo add context?
@@ -2,12 +2,13 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for the messages data.
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  from my.fbmessenger import messages
11
+
11
12
  for m in messages():
12
13
  if isinstance(m, Exception):
13
14
  yield m
@@ -1,11 +1,12 @@
1
- #!/usr/bin/env python3
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Sequence
2
4
  from functools import lru_cache
3
5
  from pathlib import Path
4
- from typing import Dict, Callable, Optional, Sequence, NamedTuple, Union, Iterable
6
+ from typing import Callable, NamedTuple, Union
5
7
 
6
8
  from ..common import Results, Url
7
9
 
8
-
9
10
  # TODO doesn't really belong here...
10
11
  Ctx = Sequence[str]
11
12
 
@@ -18,13 +19,13 @@ class EUrl(NamedTuple):
18
19
  # keys are mime types + extensions
19
20
  Ex = Callable[[Path], Union[Results, Iterable[EUrl]]]
20
21
  # None means unhandled
21
- TYPE2IDX: Dict[str, Optional[Ex]] = {}
22
+ TYPE2IDX: dict[str, Ex | None] = {}
22
23
  # NOTE: there are some types in auto.py at the moment... it's a bit messy
23
24
 
24
25
 
25
26
  # TYPE2IDX only contains the 'prefixes', to speed up the lookup we are using cache..
26
27
  @lru_cache(None)
27
- def type2idx(t: str) -> Optional[Ex]:
28
+ def type2idx(t: str) -> Ex | None:
28
29
  if len(t) == 0:
29
30
  return None # just in case?
30
31
  # first try exact match
@@ -67,6 +68,7 @@ CODE = {
67
68
  'text/vnd.graphviz',
68
69
  'text/x-diff', # patch files
69
70
  'text/x-php',
71
+ 'text/x-lilypond',
70
72
 
71
73
  # these didn't have a mime type, or were mistyped?
72
74
  'css',
@@ -96,9 +98,9 @@ audio/
96
98
  video/
97
99
  '''
98
100
 
99
- handle_later = lambda *args, **kwargs: ()
101
+ handle_later = lambda *_args, **_kwargs: ()
100
102
 
101
- def ignore(*args, **kwargs):
103
+ def ignore(*_args, **_kwargs):
102
104
  # TODO log (once?)
103
105
  yield from ()
104
106
 
@@ -115,6 +117,13 @@ TYPE2IDX.update({
115
117
  '.vcf' : ignore,
116
118
  'message/rfc822': ignore, # ??
117
119
 
120
+ # todo ignore all fonts?
121
+ 'font/woff2': ignore,
122
+ 'font/woff': ignore,
123
+ 'text/x-Algol68': ignore, # ugh some license file had this?? maybe always index text/ as text?
124
+ 'text/x-bytecode.python': ignore, # todo ignore all x-bytecode?
125
+ 'text/calendar': ignore,
126
+
118
127
  # TODO not sure what to do about these..
119
128
  'application/octet-stream': handle_later,
120
129
  'application/zip' : handle_later,
@@ -1,16 +1,14 @@
1
1
  '''
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] github module
3
3
  '''
4
+ from __future__ import annotations
4
5
 
5
6
  # Note: requires the 'mistletoe' module if you enable render_markdown
6
-
7
- from typing import Optional, Set
8
-
9
- from ..common import Results, Visit, Loc, iter_urls, logger
7
+ from promnesia.common import Loc, Results, Visit, iter_urls, logger
10
8
 
11
9
 
12
10
  def index(*, render_markdown: bool = False) -> Results:
13
- from . import hpi
11
+ from . import hpi # noqa: F401,I001
14
12
  from my.github.all import events
15
13
 
16
14
  if render_markdown:
@@ -29,9 +27,9 @@ def index(*, render_markdown: bool = False) -> Results:
29
27
  continue
30
28
 
31
29
  # if enabled, convert the (markdown) body to HTML
32
- context: Optional[str] = e.body
30
+ context: str | None = e.body
33
31
  if e.body is not None and render_markdown:
34
- context = TextParser(e.body)._doc_ashtml()
32
+ context = TextParser(e.body)._doc_ashtml() # type: ignore[possibly-undefined]
35
33
 
36
34
  # locator should link back to this event
37
35
  loc = Loc.make(title=e.summary, href=e.link)
@@ -59,7 +57,7 @@ def index(*, render_markdown: bool = False) -> Results:
59
57
  #
60
58
  # Note: this set gets reset every event, is here to
61
59
  # prevent duplicates between URLExtract and the markdown parser
62
- emitted: Set[str] = set()
60
+ emitted: set[str] = set()
63
61
  for url in iter_urls(e.body):
64
62
  if url in emitted:
65
63
  continue
@@ -74,7 +72,7 @@ def index(*, render_markdown: bool = False) -> Results:
74
72
  # extract from markdown links like [link text](https://...)
75
73
  # incase URLExtract missed any somehow
76
74
  if render_markdown:
77
- for res in extract_from_text(e.body):
75
+ for res in extract_from_text(e.body): # type: ignore[possibly-undefined]
78
76
  if isinstance(res, Exception):
79
77
  yield res
80
78
  continue
@@ -1,6 +1,7 @@
1
1
  # TODO eh. confusing how guess and auto are different...
2
2
  # maybe merge them later?
3
- from typing import Iterable, Any
3
+ from collections.abc import Iterable
4
+ from typing import Any
4
5
 
5
6
  from ..common import Extraction, PathIsh
6
7
 
@@ -4,11 +4,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] dogsheep module to import HackerN
4
4
 
5
5
  import textwrap
6
6
 
7
- from promnesia.common import Visit, Loc, Results
7
+ from promnesia.common import Loc, Results, Visit
8
8
 
9
9
 
10
10
  def index() -> Results:
11
- from . import hpi
11
+ from . import hpi # noqa: F401,I001
12
12
  from my.hackernews import dogsheep
13
13
 
14
14
  for item in dogsheep.items():
promnesia/sources/hpi.py CHANGED
@@ -2,10 +2,10 @@
2
2
  Just a helper for a more humane error message when importing my.* dependencies
3
3
  '''
4
4
 
5
- from ..common import logger
5
+ from promnesia.common import logger
6
6
 
7
7
  try:
8
- import my
8
+ import my # noqa: F401
9
9
  except ImportError as e:
10
10
  logger.exception(e)
11
11
  logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')")
promnesia/sources/html.py CHANGED
@@ -2,19 +2,21 @@
2
2
  Extracts links from HTML files
3
3
  '''
4
4
 
5
- from pathlib import Path
6
- from typing import Iterator, Tuple
5
+ from __future__ import annotations
7
6
 
8
- from ..common import PathIsh, Visit, Loc, Results, file_mtime
7
+ from collections.abc import Iterator
8
+ from pathlib import Path
9
9
 
10
10
  from bs4 import BeautifulSoup
11
11
 
12
+ from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
12
13
 
13
- # TODO present error summary in the very end; import errors -- makes sense to show
14
+ # TODO present error summary in the very end; import errors -- makes sense to show
14
15
  # TODO on some exceptions, request a fallback to text?
15
16
 
16
17
 
17
- Url = Tuple[str, str]
18
+ Url = tuple[str, str]
19
+
18
20
 
19
21
  def extract_urls_from_html(s: str) -> Iterator[Url]:
20
22
  """
@@ -1,14 +1,15 @@
1
1
  """
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myhypothesis][hypothesis]] module
3
3
  """
4
- from ..common import Loc, Results, Visit, extract_urls, join_tags
4
+
5
+ from promnesia.common import Loc, Results, Visit, extract_urls, join_tags
5
6
 
6
7
 
7
8
  def index() -> Results:
8
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
9
10
  import my.hypothesis as hyp
10
11
 
11
- for h in hyp.get_highlights():
12
+ for h in hyp.highlights():
12
13
  if isinstance(h, Exception):
13
14
  yield h
14
15
  continue
@@ -1,11 +1,11 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module
3
3
  '''
4
- from ..common import Results, logger, Visit, Loc
4
+ from promnesia.common import Loc, Results, Visit
5
5
 
6
6
 
7
7
  def index() -> Results:
8
- from . import hpi
8
+ from . import hpi # noqa: F401,I001
9
9
  import my.instapaper as ip
10
10
 
11
11
  for p in ip.pages():
@@ -1,14 +1,24 @@
1
- from pathlib import Path
2
- from typing import Iterator, NamedTuple, Optional
3
-
4
- from ..common import get_logger, Extraction, Url, PathIsh, Res, Visit, Loc, file_mtime, logger
5
-
6
-
7
- import mistletoe # type: ignore
8
- from mistletoe.span_token import AutoLink, Link # type: ignore
9
- import mistletoe.block_token as BT # type: ignore
10
- from mistletoe.html_renderer import HTMLRenderer # type: ignore
1
+ from __future__ import annotations
11
2
 
3
+ from collections.abc import Iterator
4
+ from pathlib import Path
5
+ from typing import NamedTuple
6
+
7
+ import mistletoe # type: ignore
8
+ import mistletoe.block_token as BT # type: ignore
9
+ from mistletoe.html_renderer import HTMLRenderer # type: ignore
10
+ from mistletoe.span_token import AutoLink, Link # type: ignore
11
+
12
+ from promnesia.common import (
13
+ Extraction,
14
+ Loc,
15
+ PathIsh,
16
+ Res,
17
+ Url,
18
+ Visit,
19
+ file_mtime,
20
+ logger,
21
+ )
12
22
 
13
23
  renderer = HTMLRenderer()
14
24
 
@@ -18,7 +28,7 @@ block_tokens = tuple(getattr(BT, name) for name in BT.__all__)
18
28
 
19
29
  class Parsed(NamedTuple):
20
30
  url: Url
21
- context: Optional[str]
31
+ context: str | None
22
32
 
23
33
 
24
34
  Result = Res[Parsed]
@@ -42,7 +52,7 @@ HTML_MARKER = '!html '
42
52
  def _ashtml(block) -> str:
43
53
  res = renderer.render(block)
44
54
  if res.startswith('<p>') and res.endswith('</p>'):
45
- res = res[3: -4] # meh, but for now fine
55
+ res = res[3:-4] # meh, but for now fine
46
56
  return res
47
57
 
48
58
 
@@ -62,7 +72,6 @@ class Parser:
62
72
  context = None if last_block is None else HTML_MARKER + _ashtml(last_block)
63
73
  yield Parsed(url=url, context=context)
64
74
 
65
-
66
75
  def _walk(self, cur, last_block) -> Iterator[Result]:
67
76
  if isinstance(cur, block_tokens):
68
77
  last_block = cur
@@ -73,12 +82,14 @@ class Parser:
73
82
  logger.exception(e)
74
83
  yield e
75
84
 
76
- children = getattr(cur, 'children', [])
85
+ # keeping getattr for compatibility in older versions of mistletoe, it was optional
86
+ children = getattr(cur, 'children', None)
87
+ if children is None:
88
+ return
77
89
  for c in children:
78
90
  yield from self._walk(c, last_block=last_block)
79
91
 
80
-
81
- def walk(self):
92
+ def walk(self) -> Iterator[Result]:
82
93
  yield from self._walk(self.doc, last_block=None)
83
94
 
84
95
 
@@ -94,7 +105,7 @@ def extract_from_file(fname: PathIsh) -> Iterator[Extraction]:
94
105
  yield Visit(
95
106
  url=r.url,
96
107
  dt=fallback_dt,
97
- locator=Loc.file(fname), # TODO line number
108
+ locator=Loc.file(fname), # TODO line number
98
109
  context=r.context,
99
110
  )
100
111
 
@@ -105,9 +116,9 @@ class TextParser(Parser):
105
116
  Instead of chunking blocks like for files, this returns the entire
106
117
  message rendered as the context
107
118
  '''
108
- def __init__(self, text: str):
109
- self.doc = mistletoe.Document(text)
110
119
 
120
+ def __init__(self, text: str) -> None:
121
+ self.doc = mistletoe.Document(text)
111
122
 
112
123
  def _doc_ashtml(self):
113
124
  '''
@@ -117,8 +128,7 @@ class TextParser(Parser):
117
128
  self._html = HTML_MARKER + _ashtml(self.doc)
118
129
  return self._html
119
130
 
120
-
121
- def _extract(self, cur, last_block = None) -> Iterator[Parsed]:
131
+ def _extract(self, cur, last_block=None) -> Iterator[Parsed]: # noqa: ARG002
122
132
  if not isinstance(cur, (AutoLink, Link)):
123
133
  return
124
134
 
promnesia/sources/org.py CHANGED
@@ -1,16 +1,26 @@
1
- from datetime import datetime
1
+ from __future__ import annotations
2
+
2
3
  import re
3
- from typing import Iterable, List, Set, Optional, Iterator, Tuple, NamedTuple, cast
4
+ from collections.abc import Iterable, Iterator
5
+ from datetime import datetime
4
6
  from pathlib import Path
5
-
6
-
7
- from ..common import Visit, get_logger, Results, Url, Loc, from_epoch, iter_urls, PathIsh, Res, file_mtime
8
-
7
+ from typing import NamedTuple, Optional, cast
9
8
 
10
9
  import orgparse
11
- from orgparse.date import gene_timestamp_regex, OrgDate
10
+ from orgparse.date import OrgDate, gene_timestamp_regex
12
11
  from orgparse.node import OrgNode
13
12
 
13
+ from promnesia.common import (
14
+ Loc,
15
+ PathIsh,
16
+ Res,
17
+ Results,
18
+ Url,
19
+ Visit,
20
+ file_mtime,
21
+ get_logger,
22
+ iter_urls,
23
+ )
14
24
 
15
25
  UPDATE_ORGPARSE_WARNING = 'WARNING: please update orgparse version to a more recent (pip3 install -U orgparse)'
16
26
 
@@ -36,7 +46,7 @@ CREATED_RGX = re.compile(gene_timestamp_regex(brtype='inactive'), re.VERBOSE)
36
46
  """
37
47
 
38
48
  class Parsed(NamedTuple):
39
- dt: Optional[datetime]
49
+ dt: datetime | None
40
50
  heading: str
41
51
 
42
52
 
@@ -57,8 +67,12 @@ def _parse_node(n: OrgNode) -> Parsed:
57
67
  # todo a bit hacky..
58
68
  heading = heading.replace(createds + ' ', '')
59
69
  if createds is not None:
60
- [odt] = OrgDate.list_from_str(createds)
61
- dt = odt.start
70
+ if '<%%' in createds:
71
+ # sexp date, not supported
72
+ dt = None
73
+ else:
74
+ [odt] = OrgDate.list_from_str(createds)
75
+ dt = odt.start
62
76
  else:
63
77
  dt = None
64
78
  return Parsed(dt=dt, heading=heading)
@@ -70,7 +84,7 @@ def _get_heading(n: OrgNode):
70
84
  return '' if n.is_root() else n.get_heading(format='raw')
71
85
 
72
86
 
73
- def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgNode]]]:
87
+ def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[tuple[Parsed, OrgNode]]]:
74
88
  try:
75
89
  parsed = _parse_node(node)
76
90
  except Exception as e:
@@ -80,7 +94,7 @@ def walk_node(*, node: OrgNode, dt: datetime) -> Iterator[Res[Tuple[Parsed, OrgN
80
94
  parsed = parsed._replace(dt=dt)
81
95
  else:
82
96
  dt = parsed.dt
83
- yield parsed, node
97
+ yield parsed, node
84
98
 
85
99
  for c in node.children:
86
100
  yield from walk_node(node=c, dt=dt)
@@ -94,7 +108,7 @@ def get_body_compat(node: OrgNode) -> str:
94
108
  # get_body was only added to root in 0.2.0
95
109
  for x in warn_old_orgparse_once():
96
110
  # ugh. really crap, but it will at least only warn once... (becaue it caches)
97
- raise x
111
+ raise x # noqa: B904
98
112
  return UPDATE_ORGPARSE_WARNING
99
113
  else:
100
114
  raise e
@@ -1,10 +1,9 @@
1
- from ..common import get_logger, get_tmpdir, PathIsh, _is_windows
2
- from ..compat import removeprefix
1
+ from __future__ import annotations
3
2
 
4
3
  from functools import lru_cache
5
4
  from pathlib import Path
6
- import os
7
- from typing import List
5
+
6
+ from promnesia.common import PathIsh, _is_windows, get_logger, get_tmpdir
8
7
 
9
8
  # https://linux-and-mac-hacks.blogspot.co.uk/2013/04/use-grep-and-regular-expressions-to.html
10
9
  _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@#/%=~_|]'
@@ -12,16 +11,16 @@ _URL_REGEX = r'\b(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]*[-A-Za-z0-9+&@
12
11
  if _is_windows:
13
12
  # wtf? for some reason on windows (in cmd.exe specificaly) \b isn't working...
14
13
  # this will make the regex a bit less precise, but not end of the world
15
- _URL_REGEX = removeprefix(_URL_REGEX, r'\b')
14
+ _URL_REGEX = _URL_REGEX.removeprefix(r'\b')
16
15
 
17
16
 
18
- @lru_cache()
17
+ @lru_cache
19
18
  def _has_grep() -> bool:
20
19
  import shutil
21
20
  return shutil.which('grep') is not None
22
21
 
23
22
 
24
- Command = List[str]
23
+ Command = list[str]
25
24
 
26
25
 
27
26
  _GREP_ARGS: Command = [
@@ -39,7 +38,7 @@ if not _is_windows:
39
38
 
40
39
  # NOTE: grep/findstr exit with code 1 on no matches...
41
40
  # we hack around it in shellcmd module (search 'grep')
42
- def _grep(*, paths: List[str], recursive: bool) -> Command:
41
+ def _grep(*, paths: list[str], recursive: bool) -> Command:
43
42
  return [
44
43
  'grep',
45
44
  *(['-r'] if recursive else []),
@@ -91,24 +90,26 @@ def extract_from_path(path: PathIsh) -> Command:
91
90
  logger = get_logger()
92
91
  if pp.is_dir(): # TODO handle archives here???
93
92
  return _extract_from_dir(str(pp))
94
- else:
95
- if any(pp.suffix == ex for ex in (
96
- '.xz',
97
- '.bz2',
98
- '.gz',
99
- '.zip',
100
- )):
101
- logger.info(f"Extracting from compressed file {path}")
102
- raise RuntimeError(f"Archives aren't supported yet: {path}")
103
- import lzma
104
- from tempfile import NamedTemporaryFile
105
- # TODO hopefully, no collisions
106
- import os.path
107
- fname = os.path.join(tdir.name, os.path.basename(path))
108
- with open(fname, 'wb') as fo:
109
- with lzma.open(path, 'r') as cf:
110
- fo.write(cf.read())
111
- return _extract_from_file(fname)
112
- else:
113
- r = _extract_from_file(str(pp))
114
- return r
93
+
94
+ if any(pp.suffix == ex for ex in (
95
+ '.xz',
96
+ '.bz2',
97
+ '.gz',
98
+ '.zip',
99
+ )):
100
+ # todo should be debug?
101
+ # or should delete it completely, feels like unpacking archives here is a bit too much
102
+ raise RuntimeError(f"Archives aren't supported yet: {path}")
103
+ # logger.info(f"Extracting from compressed file {path}")
104
+ # import lzma
105
+ # from tempfile import NamedTemporaryFile
106
+ # # TODO hopefully, no collisions
107
+ # import os.path
108
+ # fname = os.path.join(tdir.name, os.path.basename(path))
109
+ # with open(fname, 'wb') as fo:
110
+ # with lzma.open(path, 'r') as cf:
111
+ # fo.write(cf.read())
112
+ # return _extract_from_file(fname)
113
+
114
+ r = _extract_from_file(str(pp))
115
+ return r
@@ -1,11 +1,12 @@
1
1
  '''
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for Pocket highlights & bookmarks
3
3
  '''
4
- from ..common import Visit, Loc, Results
4
+
5
+ from promnesia.common import Loc, Results, Visit
5
6
 
6
7
 
7
8
  def index() -> Results:
8
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
9
10
  from my.pocket import articles
10
11
 
11
12
  # TODO use docstring from my. module? E.g. describing which pocket format is expected
@@ -2,21 +2,27 @@
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myreddit][reddit]] module
3
3
  '''
4
4
 
5
+ from __future__ import annotations
6
+
7
+ import typing
5
8
  from itertools import chain
6
- from typing import Set, Optional, Type
7
9
 
8
- from ..common import Visit, Loc, extract_urls, Results, logger
10
+ from promnesia.common import Loc, Results, Visit, extract_urls, logger
11
+
12
+ if typing.TYPE_CHECKING:
13
+ from my.reddit.common import Comment, RedditBase, Save, Submission, Upvote
14
+
9
15
 
16
+ def index(*, render_markdown: bool = False, renderer: type[RedditRenderer] | None = None) -> Results:
17
+ from . import hpi # noqa: F401
10
18
 
11
- def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRenderer']] = None) -> Results:
12
- from . import hpi
13
19
  try:
14
- from my.reddit.all import submissions, comments, saved, upvoted
20
+ from my.reddit.all import comments, saved, submissions, upvoted
15
21
  except ModuleNotFoundError as e:
16
22
  if "No module named 'my.reddit.all'" in str(e):
17
23
  import warnings
18
24
  warnings.warn("DEPRECATED/reddit: Using an old version of HPI, please update")
19
- from my.reddit import submissions, comments, saved, upvoted # type: ignore[no-redef]
25
+ from my.reddit import comments, saved, submissions, upvoted
20
26
  else:
21
27
  raise e
22
28
 
@@ -58,7 +64,7 @@ def index(*, render_markdown: bool = False, renderer: Optional[Type['RedditRende
58
64
  # mostly here so we can keep track of how the user
59
65
  # wants to render markdown
60
66
  class RedditRenderer:
61
- def __init__(self, render_markdown: bool = False) -> None:
67
+ def __init__(self, *, render_markdown: bool = False) -> None:
62
68
  self._link_extractor = None
63
69
  self._parser_cls = None
64
70
  try:
@@ -77,7 +83,7 @@ class RedditRenderer:
77
83
  self.render_markdown = render_markdown
78
84
 
79
85
 
80
- def _from_comment(self, i: 'Comment') -> Results:
86
+ def _from_comment(self, i: Comment) -> Results:
81
87
  locator = Loc.make(
82
88
  title='Reddit comment',
83
89
  href=i.url,
@@ -85,7 +91,7 @@ class RedditRenderer:
85
91
  yield from self._from_common(i, locator=locator)
86
92
 
87
93
 
88
- def _from_submission(self, i: 'Submission') -> Results:
94
+ def _from_submission(self, i: Submission) -> Results:
89
95
  locator = Loc.make(
90
96
  title=f'Reddit submission: {i.title}',
91
97
  href=i.url,
@@ -93,15 +99,15 @@ class RedditRenderer:
93
99
  yield from self._from_common(i, locator=locator)
94
100
 
95
101
 
96
- def _from_upvote(self, i: 'Upvote') -> Results:
102
+ def _from_upvote(self, i: Upvote) -> Results:
97
103
  locator = Loc.make(
98
- title=f'Reddit upvote',
104
+ title='Reddit upvote',
99
105
  href=i.url,
100
106
  )
101
107
  yield from self._from_common(i, locator=locator)
102
108
 
103
109
 
104
- def _from_save(self, i: 'Save') -> Results:
110
+ def _from_save(self, i: Save) -> Results:
105
111
  locator = Loc.make(
106
112
  title='Reddit save',
107
113
  href=i.url,
@@ -117,7 +123,7 @@ class RedditRenderer:
117
123
  return text
118
124
 
119
125
 
120
- def _from_common(self, i: 'RedditBase', locator: Loc) -> Results:
126
+ def _from_common(self, i: RedditBase, locator: Loc) -> Results:
121
127
  urls = [i.url]
122
128
  # TODO this should belong to HPI.. fix permalink handling I guess
123
129
  # ok, it's not present for all of them..
@@ -130,7 +136,7 @@ class RedditRenderer:
130
136
 
131
137
  context = self._render_body(i.text)
132
138
 
133
- emitted: Set[str] = set()
139
+ emitted: set[str] = set()
134
140
 
135
141
  for url in chain(urls, extract_urls(i.text)):
136
142
  if url in emitted:
@@ -165,8 +171,3 @@ class RedditRenderer:
165
171
  )
166
172
  emitted.add(res.url)
167
173
 
168
-
169
- import typing
170
- if typing.TYPE_CHECKING:
171
- from my.reddit.common import Submission, Comment, Save, Upvote, RedditBase
172
-