promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. promnesia/__init__.py +18 -4
  2. promnesia/__main__.py +104 -78
  3. promnesia/cannon.py +108 -107
  4. promnesia/common.py +107 -88
  5. promnesia/compare.py +33 -30
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +37 -34
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +13 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +19 -17
  12. promnesia/logging.py +27 -15
  13. promnesia/misc/install_server.py +32 -27
  14. promnesia/server.py +106 -79
  15. promnesia/sources/auto.py +104 -77
  16. promnesia/sources/auto_logseq.py +6 -5
  17. promnesia/sources/auto_obsidian.py +2 -2
  18. promnesia/sources/browser.py +20 -10
  19. promnesia/sources/browser_legacy.py +65 -50
  20. promnesia/sources/demo.py +7 -8
  21. promnesia/sources/fbmessenger.py +3 -3
  22. promnesia/sources/filetypes.py +22 -16
  23. promnesia/sources/github.py +9 -8
  24. promnesia/sources/guess.py +6 -2
  25. promnesia/sources/hackernews.py +7 -9
  26. promnesia/sources/hpi.py +5 -3
  27. promnesia/sources/html.py +11 -7
  28. promnesia/sources/hypothesis.py +3 -2
  29. promnesia/sources/instapaper.py +3 -2
  30. promnesia/sources/markdown.py +22 -12
  31. promnesia/sources/org.py +36 -17
  32. promnesia/sources/plaintext.py +41 -39
  33. promnesia/sources/pocket.py +5 -3
  34. promnesia/sources/reddit.py +24 -26
  35. promnesia/sources/roamresearch.py +5 -2
  36. promnesia/sources/rss.py +6 -8
  37. promnesia/sources/shellcmd.py +21 -11
  38. promnesia/sources/signal.py +27 -26
  39. promnesia/sources/smscalls.py +2 -3
  40. promnesia/sources/stackexchange.py +5 -4
  41. promnesia/sources/takeout.py +37 -34
  42. promnesia/sources/takeout_legacy.py +29 -19
  43. promnesia/sources/telegram.py +18 -12
  44. promnesia/sources/telegram_legacy.py +22 -11
  45. promnesia/sources/twitter.py +7 -6
  46. promnesia/sources/vcs.py +11 -6
  47. promnesia/sources/viber.py +11 -10
  48. promnesia/sources/website.py +8 -7
  49. promnesia/sources/zulip.py +3 -2
  50. promnesia/sqlite.py +13 -7
  51. promnesia/tests/common.py +10 -5
  52. promnesia/tests/server_helper.py +13 -10
  53. promnesia/tests/sources/test_auto.py +2 -3
  54. promnesia/tests/sources/test_filetypes.py +11 -8
  55. promnesia/tests/sources/test_hypothesis.py +10 -6
  56. promnesia/tests/sources/test_org.py +9 -5
  57. promnesia/tests/sources/test_plaintext.py +9 -8
  58. promnesia/tests/sources/test_shellcmd.py +13 -13
  59. promnesia/tests/sources/test_takeout.py +3 -5
  60. promnesia/tests/test_cannon.py +256 -239
  61. promnesia/tests/test_cli.py +12 -8
  62. promnesia/tests/test_compare.py +17 -13
  63. promnesia/tests/test_config.py +7 -8
  64. promnesia/tests/test_db_dump.py +15 -15
  65. promnesia/tests/test_extract.py +17 -10
  66. promnesia/tests/test_indexer.py +24 -18
  67. promnesia/tests/test_server.py +12 -13
  68. promnesia/tests/test_traverse.py +0 -2
  69. promnesia/tests/utils.py +3 -7
  70. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  71. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  72. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  73. promnesia/kjson.py +0 -121
  74. promnesia/sources/__init__.pyi +0 -0
  75. promnesia-1.2.20240810.dist-info/METADATA +0 -54
  76. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  77. promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
  78. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  79. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/sources/auto.py CHANGED
@@ -2,36 +2,48 @@
2
2
  - discovers files recursively
3
3
  - guesses the format (orgmode/markdown/json/etc) by the extension/MIME type
4
4
  - can index most of plaintext files, including source code!
5
- - autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
6
- - autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
5
+ - autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/auto_obsidian.py][promnesia.sources.obsidian]]
6
+ - autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/auto_logseq.py][promnesia.sources.logseq]]
7
7
  """
8
8
 
9
+ from __future__ import annotations
10
+
9
11
  import csv
10
- from concurrent.futures import ProcessPoolExecutor as Pool
11
- from contextlib import nullcontext
12
- from datetime import datetime
13
12
  import itertools
14
13
  import json
15
14
  import os
16
- from typing import Optional, Iterable, Union, List, Tuple, NamedTuple, Sequence, Iterator, Iterable, Callable, Any, Dict, Set
15
+ from collections.abc import Callable, Iterable, Iterator, Sequence
16
+ from concurrent.futures import ProcessPoolExecutor as Pool
17
+ from contextlib import nullcontext
17
18
  from fnmatch import fnmatch
19
+ from functools import wraps
18
20
  from pathlib import Path
19
- from functools import lru_cache, wraps
20
- import warnings
21
-
22
- import pytz
23
-
24
- from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
25
- from ..common import warn_once
26
- from ..config import use_cores
27
-
21
+ from typing import Any, NamedTuple
22
+
23
+ from promnesia.common import (
24
+ Loc,
25
+ PathIsh,
26
+ Result,
27
+ Results,
28
+ Visit,
29
+ echain,
30
+ extract_urls,
31
+ file_mtime,
32
+ get_logger,
33
+ get_tmpdir,
34
+ logger,
35
+ mime,
36
+ traverse,
37
+ warn_once,
38
+ )
39
+ from promnesia.config import use_cores
28
40
 
29
- from .filetypes import EUrl, Ctx
30
- from .auto_obsidian import obsidian_replacer
31
41
  from .auto_logseq import logseq_replacer
42
+ from .auto_obsidian import obsidian_replacer
43
+ from .filetypes import Ctx, EUrl
32
44
 
33
45
 
34
- def _collect(thing, path: List[str], result: List[EUrl]) -> None:
46
+ def _collect(thing, path: list[str], result: list[EUrl]) -> None:
35
47
  if isinstance(thing, str):
36
48
  ctx: Ctx = tuple(path)
37
49
  result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
@@ -51,15 +63,16 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None:
51
63
 
52
64
 
53
65
  # TODO mm. okay, I suppose could use kython consuming thingy?..
54
- def collect_from(thing) -> List[EUrl]:
55
- uuu: List[EUrl] = []
56
- path: List[str] = []
66
+ def collect_from(thing) -> list[EUrl]:
67
+ uuu: list[EUrl] = []
68
+ path: list[str] = []
57
69
  _collect(thing, path, uuu)
58
70
  return uuu
59
71
 
60
72
 
61
73
  Urls = Iterator[EUrl]
62
74
 
75
+
63
76
  def _csv(path: Path) -> Urls:
64
77
  # TODO these could also have Loc to be fair..
65
78
  with path.open() as fo:
@@ -77,6 +90,7 @@ def _json(path: Path) -> Urls:
77
90
  def _plaintext(path: Path) -> Results:
78
91
  from . import shellcmd
79
92
  from .plaintext import extract_from_path
93
+
80
94
  yield from shellcmd.index(extract_from_path(path))
81
95
 
82
96
 
@@ -85,7 +99,8 @@ def _plaintext(path: Path) -> Results:
85
99
  def fallback(ex):
86
100
  """Falls back to plaintext in case of issues"""
87
101
 
88
- fallback_active: Dict[Any, bool] = {}
102
+ fallback_active: dict[Any, bool] = {}
103
+
89
104
  @wraps(ex)
90
105
  def wrapped(path: Path):
91
106
  nonlocal fallback_active
@@ -99,79 +114,83 @@ def fallback(ex):
99
114
  except ModuleNotFoundError as me:
100
115
  logger = get_logger()
101
116
  logger.exception(me)
102
- logger.warn('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
117
+ logger.warning(
118
+ '%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!',
119
+ path,
120
+ me.name,
121
+ me.name,
122
+ )
103
123
  yield me
104
124
  fallback_active[ex] = True
105
125
  do_fallback = True
106
126
  if do_fallback:
107
127
  yield from _plaintext(path)
128
+
108
129
  return wrapped
109
130
 
110
131
 
111
132
  @fallback
112
133
  def _markdown(path: Path) -> Results:
113
134
  from . import markdown
135
+
114
136
  yield from markdown.extract_from_file(path)
115
137
 
116
138
 
117
139
  @fallback
118
140
  def _html(path: Path) -> Results:
119
141
  from . import html
142
+
120
143
  yield from html.extract_from_file(path)
121
144
 
122
145
 
123
146
  @fallback
124
147
  def _org(path: Path) -> Results:
125
148
  from . import org
126
- return org.extract_from_file(path)
127
-
128
-
129
- from .filetypes import TYPE2IDX, type2idx, IGNORE, CODE
130
-
131
- TYPE2IDX.update({
132
- 'application/json': _json,
133
- '.json' : _json,
134
- '.ipynb' : _json,
135
-
136
- '.csv' : _csv,
137
- 'application/csv': _csv,
138
-
139
- '.org' : _org,
140
- '.org_archive': _org,
141
-
142
- '.md' : _markdown,
143
- '.markdown' : _markdown,
144
-
145
- 'text/plain' : _plaintext,
146
- '.txt' : _plaintext,
147
- '.page' : _plaintext,
148
- '.rst' : _plaintext,
149
149
 
150
+ return org.extract_from_file(path)
150
151
 
151
- # TODO doesn't work that great; weird stuff like
152
- # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
153
- # TODO could have stricter url extraction for that; always using http/https?
154
- # '.ipynb' : _json,
155
-
156
- '.html' : _html,
157
- 'text/html': _html,
158
- 'text/xml' : _plaintext,
159
152
 
160
- 'text/x-po': _plaintext, # some translation files
161
- })
153
+ from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
154
+
155
+ TYPE2IDX.update(
156
+ {
157
+ 'application/json': _json,
158
+ '.json': _json,
159
+ '.ipynb': _json,
160
+ '.csv': _csv,
161
+ 'application/csv': _csv,
162
+ '.org': _org,
163
+ '.org_archive': _org,
164
+ '.md': _markdown,
165
+ '.markdown': _markdown,
166
+ 'text/plain': _plaintext,
167
+ '.txt': _plaintext,
168
+ '.page': _plaintext,
169
+ '.rst': _plaintext,
170
+ # TODO doesn't work that great; weird stuff like
171
+ # builtins.ImportError.name|2019-07-10T12:12:35.584510+00:00|names::ImportError::node::names::name::node::fullname
172
+ # TODO could have stricter url extraction for that; always using http/https?
173
+ # '.ipynb' : _json,
174
+ '.html': _html,
175
+ 'text/html': _html,
176
+ 'text/xml': _plaintext,
177
+ 'text/x-po': _plaintext, # some translation files
178
+ }
179
+ )
162
180
 
163
181
  for t in CODE:
164
182
  TYPE2IDX[t] = _plaintext
165
183
  # TODO ok, mime doesn't really tell between org/markdown/etc anyway
166
184
 
167
185
 
168
- Replacer = Optional[Callable[[str, str], str]]
186
+ Replacer = Callable[[str, str], str] | None
187
+
169
188
 
170
189
  def index(
171
- *paths: PathIsh,
172
- ignored: Union[Sequence[str], str]=(),
173
- follow: bool=True,
174
- replacer: Replacer=None,
190
+ *paths: PathIsh,
191
+ ignored: Sequence[str] | str = (),
192
+ follow: bool = True,
193
+ replacer: Replacer = None,
175
194
  ) -> Results:
176
195
  '''
177
196
  path : a path or list of paths to recursively index
@@ -204,16 +223,17 @@ def index(
204
223
  )
205
224
  yield from _index(apath, opts=opts)
206
225
 
226
+
207
227
  class Options(NamedTuple):
208
228
  ignored: Sequence[str]
209
229
  follow: bool
210
230
  # TODO option to add ignores? not sure..
211
231
  # TODO I don't like this replacer thing... think about removing it
212
232
  replacer: Replacer
213
- root: Optional[Path]=None
233
+ root: Path | None = None
214
234
 
215
235
 
216
- def _index_file_aux(path: Path, opts: Options) -> Union[Exception, List[Result]]:
236
+ def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
217
237
  # just a helper for the concurrent version (the generator isn't picklable)
218
238
  try:
219
239
  return list(_index_file(path, opts=opts))
@@ -226,14 +246,14 @@ def _index(path: Path, opts: Options) -> Results:
226
246
  logger = get_logger()
227
247
 
228
248
  cores = use_cores()
229
- if cores is None: # do not use cores
249
+ if cores is None: # do not use cores
230
250
  # todo use ExitStack instead?
231
251
  pool = nullcontext()
232
- mapper = map # dummy pool
252
+ mapper = map # dummy pool
233
253
  else:
234
254
  workers = None if cores == 0 else cores
235
- pool = Pool(workers) # type: ignore
236
- mapper = pool.map # type: ignore
255
+ pool = Pool(workers) # type: ignore[assignment]
256
+ mapper = pool.map # type: ignore[attr-defined]
237
257
 
238
258
  # iterate over resolved paths, to avoid duplicates
239
259
  def rit() -> Iterable[Path]:
@@ -243,18 +263,19 @@ def _index(path: Path, opts: Options) -> Results:
243
263
  # TODO not sure if should log here... might end up with quite a bit of logs
244
264
  logger.debug('ignoring %s: user ignore rules', p)
245
265
  continue
246
- if any(i in p.parts for i in IGNORE): # meh, not very efficient.. pass to traverse??
266
+ if any(i in p.parts for i in IGNORE): # meh, not very efficient.. pass to traverse??
247
267
  logger.debug('ignoring %s: default ignore rules', p)
248
268
  continue
249
269
 
250
270
  p = p.resolve()
251
- if not os.path.exists(p):
271
+ if not os.path.exists(p): # noqa: PTH110
252
272
  logger.debug('ignoring %s: broken symlink?', p)
253
273
  continue
254
274
 
255
275
  yield p
256
276
 
257
277
  from more_itertools import unique_everseen
278
+
258
279
  it = unique_everseen(rit())
259
280
 
260
281
  with pool:
@@ -266,8 +287,10 @@ def _index(path: Path, opts: Options) -> Results:
266
287
 
267
288
 
268
289
  Mime = str
269
- from .filetypes import Ex # meh
270
- def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
290
+ from .filetypes import Ex # meh
291
+
292
+
293
+ def by_path(pp: Path) -> tuple[Ex | None, Mime | None]:
271
294
  suf = pp.suffix.lower()
272
295
  # firt check suffixes, it's faster
273
296
  s = type2idx(suf)
@@ -289,9 +312,10 @@ def _index_file(pp: Path, opts: Options) -> Results:
289
312
  # TODO not even sure if it's used...
290
313
  suf = pp.suffix.lower()
291
314
 
292
- if suf == '.xz': # TODO zstd?
315
+ if suf == '.xz': # TODO zstd?
293
316
  import lzma
294
- uname = pp.name[:-len('.xz')] # chop off suffix, so the downstream indexer can handle it
317
+
318
+ uname = pp.name[: -len('.xz')] # chop off suffix, so the downstream indexer can handle it
295
319
 
296
320
  assert pp.is_absolute(), pp
297
321
  # make sure to keep hierarchy, otherwise might end up with some name conflicts if filenames clash
@@ -318,7 +342,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
318
342
 
319
343
  logger.debug('indexing via %s: %s', ip.__name__, pp)
320
344
 
321
- def indexer() -> Union[Urls, Results]:
345
+ def indexer() -> Urls | Results:
322
346
  # eh, annoying.. need to make more generic..
323
347
  idx = ip(pp)
324
348
  try:
@@ -346,17 +370,20 @@ def _index_file(pp: Path, opts: Options) -> Results:
346
370
  v = r
347
371
 
348
372
  loc = v.locator
349
- if loc is not None and root is not None:
373
+ # FIXME double checke that v.locator indeed can't be none and remove the check?
374
+ if loc is not None and root is not None: # type: ignore[redundant-expr]
350
375
  # meh. but it works
351
376
  # todo potentially, just use dataclasses instead...
352
377
  loc = loc._replace(title=loc.title.replace(str(root) + os.sep, ''))
353
378
  v = v._replace(locator=loc)
354
379
 
355
380
  if replacer is not None and root is not None:
356
- upd: Dict[str, Any] = {}
381
+ upd: dict[str, Any] = {}
357
382
  href = v.locator.href
358
383
  if href is not None:
359
- upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))
384
+ upd['locator'] = v.locator._replace(
385
+ href=replacer(href, str(root)), title=replacer(v.locator.title, str(root))
386
+ )
360
387
  ctx = v.context
361
388
  if ctx is not None:
362
389
  # TODO in context, http is unnecessary
@@ -1,14 +1,15 @@
1
1
  import os.path
2
2
  import urllib.parse
3
3
 
4
+
4
5
  def logseq_replacer(path: str, root: str) -> str:
5
- if not path.startswith("editor://") or not (path.endswith('.md') or path.endswith('.org')):
6
+ if not path.startswith("editor://") or not (path.endswith((".md", ".org"))):
6
7
  return path
7
-
8
- graph = os.path.basename(root)
9
- page_name = os.path.basename(path).rsplit('.', 1)[0]
8
+
9
+ graph = os.path.basename(root) # noqa: PTH119
10
+ page_name = os.path.basename(path).rsplit('.', 1)[0] # noqa: PTH119
10
11
  encoded_page_name = urllib.parse.quote(page_name)
11
-
12
+
12
13
  uri = f"logseq://graph/{graph}?page={encoded_page_name}"
13
14
 
14
15
  return uri
@@ -1,8 +1,8 @@
1
1
  def obsidian_replacer(p: str, r: str) -> str:
2
2
  if not p.startswith("editor://") or not p.endswith('.md'):
3
3
  return p
4
-
4
+
5
5
  path = p.split('/', 2)[-1]
6
-
6
+
7
7
  uri = f"obsidian://{path}"
8
8
  return uri
@@ -2,32 +2,37 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
3
3
  '''
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import re
6
- from typing import Optional, Iterator, Any, TYPE_CHECKING
7
8
  import warnings
9
+ from collections.abc import Iterator
10
+ from typing import TYPE_CHECKING, Any
8
11
 
9
- from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db
12
+ from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
10
13
 
11
14
 
12
- def index(p: Optional[PathIsh]=None) -> Results:
13
- from . import hpi
15
+ def index(p: PathIsh | None = None) -> Results:
16
+ from . import hpi # noqa: F401
14
17
 
15
18
  if p is None:
16
19
  from my.browser.all import history
20
+
17
21
  yield from _index_new(history())
18
22
  return
19
23
 
20
24
  warnings.warn(
21
25
  f'Passing paths to promnesia.sources.browser is deprecated, you should setup my.browser.export instead. '
22
- f'See https://github.com/seanbreckenridge/browserexport#hpi .'
26
+ f'See https://github.com/purarue/browserexport#hpi .'
23
27
  f'Will try to hack path to browser databases {p} into HPI config.'
24
28
  )
25
29
  try:
26
30
  yield from _index_new_with_adhoc_config(path=p)
27
- return
28
31
  except Exception as e:
29
32
  logger.exception(e)
30
33
  warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
34
+ else:
35
+ return
31
36
 
32
37
  logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
33
38
  yield from _index_old(path=p)
@@ -35,22 +40,25 @@ def index(p: Optional[PathIsh]=None) -> Results:
35
40
 
36
41
  def _index_old(*, path: PathIsh) -> Results:
37
42
  from . import browser_legacy
43
+
38
44
  yield from browser_legacy.index(path)
39
45
 
40
46
 
41
47
  def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
42
- from . import hpi
48
+ from . import hpi # noqa: F401,I001
43
49
 
44
50
  ## previously, it was possible to index be called with multiple different db search paths
45
51
  ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
46
52
  ## so we hack cachew path so it's different for each call
47
53
  from my.core.core_config import config as hpi_core_config
54
+
48
55
  hpi_cache_dir = hpi_core_config.get_cache_dir()
49
56
  sanitized_path = re.sub(r'\W', '_', str(path))
50
57
  cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
51
58
  ##
52
59
 
53
- from my.core.common import classproperty, Paths, get_files
60
+ from my.core.common import Paths, classproperty, get_files
61
+
54
62
  class config:
55
63
  class core:
56
64
  cache_dir = cache_override
@@ -62,8 +70,10 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
62
70
  return tuple([f for f in get_files(path, glob='**/*') if is_sqlite_db(f)])
63
71
 
64
72
  from my.core.cfg import tmp_config
73
+
65
74
  with tmp_config(modules='my.browser.export|my.core.core_config', config=config):
66
75
  from my.browser.export import history
76
+
67
77
  yield from _index_new(history())
68
78
 
69
79
 
@@ -75,8 +85,8 @@ else:
75
85
 
76
86
  def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
77
87
  for v in history:
78
- desc: Optional[str] = None
79
- duration: Optional[Second] = None
88
+ desc: str | None = None
89
+ duration: Second | None = None
80
90
  metadata = v.metadata
81
91
  if metadata is not None:
82
92
  desc = metadata.title