promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. promnesia/__init__.py +18 -4
  2. promnesia/__main__.py +104 -78
  3. promnesia/cannon.py +108 -107
  4. promnesia/common.py +107 -88
  5. promnesia/compare.py +33 -30
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +37 -34
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +13 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +19 -17
  12. promnesia/logging.py +27 -15
  13. promnesia/misc/install_server.py +32 -27
  14. promnesia/server.py +106 -79
  15. promnesia/sources/auto.py +104 -77
  16. promnesia/sources/auto_logseq.py +6 -5
  17. promnesia/sources/auto_obsidian.py +2 -2
  18. promnesia/sources/browser.py +20 -10
  19. promnesia/sources/browser_legacy.py +65 -50
  20. promnesia/sources/demo.py +7 -8
  21. promnesia/sources/fbmessenger.py +3 -3
  22. promnesia/sources/filetypes.py +22 -16
  23. promnesia/sources/github.py +9 -8
  24. promnesia/sources/guess.py +6 -2
  25. promnesia/sources/hackernews.py +7 -9
  26. promnesia/sources/hpi.py +5 -3
  27. promnesia/sources/html.py +11 -7
  28. promnesia/sources/hypothesis.py +3 -2
  29. promnesia/sources/instapaper.py +3 -2
  30. promnesia/sources/markdown.py +22 -12
  31. promnesia/sources/org.py +36 -17
  32. promnesia/sources/plaintext.py +41 -39
  33. promnesia/sources/pocket.py +5 -3
  34. promnesia/sources/reddit.py +24 -26
  35. promnesia/sources/roamresearch.py +5 -2
  36. promnesia/sources/rss.py +6 -8
  37. promnesia/sources/shellcmd.py +21 -11
  38. promnesia/sources/signal.py +27 -26
  39. promnesia/sources/smscalls.py +2 -3
  40. promnesia/sources/stackexchange.py +5 -4
  41. promnesia/sources/takeout.py +37 -34
  42. promnesia/sources/takeout_legacy.py +29 -19
  43. promnesia/sources/telegram.py +18 -12
  44. promnesia/sources/telegram_legacy.py +22 -11
  45. promnesia/sources/twitter.py +7 -6
  46. promnesia/sources/vcs.py +11 -6
  47. promnesia/sources/viber.py +11 -10
  48. promnesia/sources/website.py +8 -7
  49. promnesia/sources/zulip.py +3 -2
  50. promnesia/sqlite.py +13 -7
  51. promnesia/tests/common.py +10 -5
  52. promnesia/tests/server_helper.py +13 -10
  53. promnesia/tests/sources/test_auto.py +2 -3
  54. promnesia/tests/sources/test_filetypes.py +11 -8
  55. promnesia/tests/sources/test_hypothesis.py +10 -6
  56. promnesia/tests/sources/test_org.py +9 -5
  57. promnesia/tests/sources/test_plaintext.py +9 -8
  58. promnesia/tests/sources/test_shellcmd.py +13 -13
  59. promnesia/tests/sources/test_takeout.py +3 -5
  60. promnesia/tests/test_cannon.py +256 -239
  61. promnesia/tests/test_cli.py +12 -8
  62. promnesia/tests/test_compare.py +17 -13
  63. promnesia/tests/test_config.py +7 -8
  64. promnesia/tests/test_db_dump.py +15 -15
  65. promnesia/tests/test_extract.py +17 -10
  66. promnesia/tests/test_indexer.py +24 -18
  67. promnesia/tests/test_server.py +12 -13
  68. promnesia/tests/test_traverse.py +0 -2
  69. promnesia/tests/utils.py +3 -7
  70. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  71. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  72. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  73. promnesia/kjson.py +0 -121
  74. promnesia/sources/__init__.pyi +0 -0
  75. promnesia-1.2.20240810.dist-info/METADATA +0 -54
  76. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  77. promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
  78. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  79. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/__init__.py CHANGED
@@ -1,6 +1,20 @@
1
- from pathlib import Path
2
- from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res
3
-
4
1
  # add deprecation warning so eventually this may converted to a namespace package?
5
2
  import warnings
6
- warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning)
3
+
4
+ from .common import ( # noqa: F401
5
+ Context,
6
+ DbVisit,
7
+ Loc,
8
+ PathIsh,
9
+ Res,
10
+ Results,
11
+ Source,
12
+ Visit,
13
+ last,
14
+ )
15
+
16
+ # TODO think again about it -- what are the pros and cons?
17
+ warnings.warn(
18
+ "DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'",
19
+ DeprecationWarning,
20
+ )
promnesia/__main__.py CHANGED
@@ -5,24 +5,33 @@ import ast
5
5
  import importlib
6
6
  import inspect
7
7
  import os
8
- from pathlib import Path
8
+ import shlex
9
9
  import shutil
10
- from subprocess import run, check_call, Popen
11
10
  import sys
11
+ from collections.abc import Callable, Iterable, Iterator, Sequence
12
+ from pathlib import Path
13
+ from subprocess import Popen, check_call, run
12
14
  from tempfile import TemporaryDirectory, gettempdir
13
- from typing import Callable, Sequence, Iterable, Iterator, Union
14
15
 
15
-
16
- from . import config
17
- from . import server
18
- from .misc import install_server
19
- from .common import Extractor, PathIsh, logger, get_tmpdir, DbVisit, Res
20
- from .common import Source, get_system_tz, user_config_file, default_config_path
16
+ from . import config, server
17
+ from .common import (
18
+ DbVisit,
19
+ Extractor,
20
+ PathIsh,
21
+ Res,
22
+ Source,
23
+ default_config_path,
24
+ get_system_tz,
25
+ get_tmpdir,
26
+ logger,
27
+ user_config_file,
28
+ )
21
29
  from .database.dump import visits_to_sqlite
22
30
  from .extract import extract_visits
31
+ from .misc import install_server
23
32
 
24
33
 
25
- def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Res[DbVisit]]:
34
+ def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]:
26
35
  cfg = config.get()
27
36
  output_dir = cfg.output_dir
28
37
  # not sure if belongs here??
@@ -44,7 +53,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
44
53
  if name and is_subset_sources:
45
54
  matched = name in sources_subset or i in sources_subset
46
55
  if matched:
47
- sources_subset -= {i, name} # type: ignore
56
+ sources_subset -= {i, name} # type: ignore[operator]
48
57
  else:
49
58
  logger.debug("skipping '%s' not in --sources.", name)
50
59
  continue
@@ -59,8 +68,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
59
68
  yield RuntimeError(f"Shouldn't have gotten this as a source: {source}")
60
69
  continue
61
70
 
62
- # todo hmm it's not even used??
63
- einfo = source.description
71
+ _einfo = source.description # FIXME hmm it's not even used?? add as exception notes?
64
72
  for v in extract_visits(source, src=source.name):
65
73
  if hook is None:
66
74
  yield v
@@ -70,13 +78,16 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
70
78
  except Exception as e:
71
79
  yield e
72
80
 
73
- if sources_subset:
81
+ if sources_subset: # type: ignore[truthy-iterable]
74
82
  logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
75
83
 
76
84
 
77
- def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db: bool=False) -> Iterable[Exception]:
85
+ def _do_index(
86
+ *, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False
87
+ ) -> Iterable[Exception]:
78
88
  # also keep & return errors for further display
79
89
  errors: list[Exception] = []
90
+
80
91
  def it() -> Iterable[Res[DbVisit]]:
81
92
  for v in iter_all_visits(sources_subset):
82
93
  if isinstance(v, Exception):
@@ -98,11 +109,12 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
98
109
 
99
110
  def do_index(
100
111
  config_file: Path,
101
- dry: bool=False,
102
- sources_subset: Iterable[Union[str, int]]=(),
103
- overwrite_db: bool=False,
112
+ *,
113
+ dry: bool = False,
114
+ sources_subset: Iterable[str | int] = (),
115
+ overwrite_db: bool = False,
104
116
  ) -> Sequence[Exception]:
105
- config.load_from(config_file) # meh.. should be cleaner
117
+ config.load_from(config_file) # meh.. should be cleaner
106
118
  try:
107
119
  errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
108
120
  finally:
@@ -120,13 +132,17 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]:
120
132
  def lazy(name: str) -> Callable[[], Extractor]:
121
133
  # helper to avoid failed imports etc, since people might be lacking necessary dependencies
122
134
  def inner() -> Extractor:
123
- from . import sources
135
+ # TODO why this import??
136
+ from . import sources # noqa: F401
137
+
124
138
  module = importlib.import_module(f'promnesia.sources.{name}')
125
139
  return getattr(module, 'index')
140
+
126
141
  return inner
127
142
 
128
143
  res = {}
129
144
  import promnesia.sources
145
+
130
146
  path: list[str] = getattr(promnesia.sources, '__path__') # should be present
131
147
  for p in path:
132
148
  for x in sorted(Path(p).glob('*.py')):
@@ -138,16 +154,16 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]:
138
154
 
139
155
 
140
156
  def do_demo(
141
- *,
142
- index_as: str,
143
- params: Sequence[str],
144
- port: str | None,
145
- config_file: Path | None,
146
- dry: bool=False,
147
- name: str='demo',
148
- sources_subset: Iterable[Union[str, int]]=(),
149
- overwrite_db: bool=False,
150
- ) -> None:
157
+ *,
158
+ index_as: str,
159
+ params: Sequence[str],
160
+ port: str | None,
161
+ config_file: Path | None,
162
+ dry: bool = False,
163
+ name: str = 'demo',
164
+ sources_subset: Iterable[str | int] = (),
165
+ overwrite_db: bool = False,
166
+ ) -> None:
151
167
  with TemporaryDirectory() as tdir:
152
168
  outdir = Path(tdir)
153
169
 
@@ -170,17 +186,17 @@ def do_demo(
170
186
 
171
187
  dbp = config.get().db
172
188
  if port is None:
173
- logger.warning(f"Port isn't specified, not serving!\nYou can inspect the database in the meantime, e.g. 'sqlitebrowser {dbp}'")
189
+ logger.warning(
190
+ f"Port isn't specified, not serving!\nYou can inspect the database in the meantime, e.g. 'sqlitebrowser {dbp}'"
191
+ )
174
192
  else:
175
193
  from .server import ServerConfig
194
+
176
195
  server._run(
177
196
  host='127.0.0.1',
178
197
  port=port,
179
198
  quiet=False,
180
- config=ServerConfig(
181
- db=dbp,
182
- timezone=get_system_tz()
183
- ),
199
+ config=ServerConfig(db=dbp, timezone=get_system_tz()),
184
200
  )
185
201
 
186
202
  if sys.stdin.isatty():
@@ -189,6 +205,7 @@ def do_demo(
189
205
 
190
206
  def read_example_config() -> str:
191
207
  from .misc import config_example
208
+
192
209
  return inspect.getsource(config_example)
193
210
 
194
211
 
@@ -202,7 +219,10 @@ def config_create(args: argparse.Namespace) -> None:
202
219
  stub = read_example_config()
203
220
  cfgdir.mkdir(parents=True)
204
221
  cfg.write_text(stub)
205
- logger.info("Created a stub config in '%s'. Edit it to tune to your liking. (see https://github.com/karlicoss/promnesia#setup for more info)", cfg)
222
+ logger.info(
223
+ "Created a stub config in '%s'. Edit it to tune to your liking. (see https://github.com/karlicoss/promnesia#setup for more info)",
224
+ cfg,
225
+ )
206
226
 
207
227
 
208
228
  def config_check(args: argparse.Namespace) -> None:
@@ -219,9 +239,10 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
219
239
  logger.info('config: %s', cfg)
220
240
 
221
241
  def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
222
- logger.debug(' '.join(map(str, cmd)))
223
- res = run(cmd, **kwargs)
242
+ logger.debug(shlex.join(map(str, cmd)))
243
+ res = run(cmd, **kwargs) # noqa: PLW1510
224
244
  if res.returncode > 0:
245
+ # TODO what's up with empty exception??
225
246
  yield Exception()
226
247
 
227
248
  logger.info('Checking syntax...')
@@ -232,27 +253,31 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
232
253
  **os.environ,
233
254
  # if config is on read only partition, the command would fail due to generated bytecode
234
255
  # so put it in the temporary directory instead
235
- 'PYTHONPYCACHEPREFIX': gettempdir()
256
+ 'PYTHONPYCACHEPREFIX': gettempdir(),
236
257
  },
237
258
  )
238
259
 
239
260
  # todo not sure if should be more defensive than check_call here
240
261
  logger.info('Checking type safety...')
241
262
  try:
242
- import mypy
263
+ import mypy # noqa: F401
243
264
  except ImportError:
244
265
  logger.warning("mypy not found, can't use it to check config!")
245
266
  else:
246
- yield from check([
247
- sys.executable, '-m', 'mypy',
248
- '--namespace-packages',
249
- '--color-output', # not sure if works??
250
- '--pretty',
251
- '--show-error-codes',
252
- '--show-error-context',
253
- '--check-untyped-defs',
254
- cfg,
255
- ])
267
+ yield from check(
268
+ [
269
+ sys.executable,
270
+ '-m',
271
+ 'mypy',
272
+ '--namespace-packages',
273
+ '--color-output', # not sure if works??
274
+ '--pretty',
275
+ '--show-error-codes',
276
+ '--show-error-context',
277
+ '--check-untyped-defs',
278
+ cfg,
279
+ ]
280
+ )
256
281
 
257
282
  logger.info('Checking runtime errors...')
258
283
  yield from check([sys.executable, cfg])
@@ -260,7 +285,7 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
260
285
 
261
286
  def cli_doctor_db(args: argparse.Namespace) -> None:
262
287
  # todo could fallback to 'sqlite3 <db> .dump'?
263
- config.load_from(args.config) # TODO meh
288
+ config.load_from(args.config) # TODO meh
264
289
  db = config.get().db
265
290
  if not db.exists():
266
291
  logger.error("Database {db} doesn't exist!")
@@ -287,16 +312,15 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
287
312
  cmd = ['curl', endpoint]
288
313
  logger.info(f'Running {cmd}')
289
314
  check_call(cmd)
290
- print() # curl doesn't add newline
315
+ print() # curl doesn't add newline
291
316
  logger.info('You should see the database path and version above!')
292
317
 
293
318
 
294
- def _ordinal_or_name(s: str) -> Union[str, int]:
319
+ def _ordinal_or_name(s: str) -> str | int:
295
320
  try:
296
- s = int(s) # type: ignore
321
+ return int(s)
297
322
  except ValueError:
298
- pass
299
- return s
323
+ return s
300
324
 
301
325
 
302
326
  def main() -> None:
@@ -308,7 +332,9 @@ def main() -> None:
308
332
  if not given, all :func:`demo_sources()` are run
309
333
  """
310
334
  parser.add_argument('--config', type=Path, default=default_config_path, help='Config path')
311
- parser.add_argument('--dry', action='store_true', help="Dry run, won't touch the database, only print the results out")
335
+ parser.add_argument(
336
+ '--dry', action='store_true', help="Dry run, won't touch the database, only print the results out"
337
+ )
312
338
  parser.add_argument(
313
339
  '--sources',
314
340
  required=False,
@@ -322,13 +348,12 @@ def main() -> None:
322
348
  '--overwrite',
323
349
  required=False,
324
350
  action="store_true",
325
- help="Empty db before populating it with newly indexed visits."
326
- " If interrupted, db is left untouched."
351
+ help="Empty db before populating it with newly indexed visits. If interrupted, db is left untouched.",
327
352
  )
328
353
 
329
354
  F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
330
355
  p = argparse.ArgumentParser(formatter_class=F)
331
- subp = p.add_subparsers(dest='mode', )
356
+ subp = p.add_subparsers(dest='mode')
332
357
  ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
333
358
  add_index_args(ep, default_config_path())
334
359
  # TODO use some way to override or provide config only via cmdline?
@@ -341,43 +366,45 @@ def main() -> None:
341
366
  # TODO use docstring or something?
342
367
  #
343
368
 
344
- add_port_arg = lambda p: p.add_argument('--port', type=str, default='13131' , help='Port to serve on')
369
+ add_port_arg = lambda p: p.add_argument('--port', type=str, default='13131', help='Port to serve on')
345
370
 
346
- ap.add_argument('--name', type=str, default='demo' , help='Set custom source name')
371
+ ap.add_argument('--name', type=str, default='demo', help='Set custom source name')
347
372
  add_port_arg(ap)
348
- ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server')
373
+ ap.add_argument(
374
+ '--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server'
375
+ )
349
376
  ap.add_argument(
350
377
  '--as',
351
- choices=list(sorted(demo_sources().keys())),
378
+ choices=sorted(demo_sources().keys()),
352
379
  default='guess',
353
380
  help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
354
381
  )
355
382
  add_index_args(ap)
356
383
  ap.add_argument('params', nargs='*', help='Optional extra params for the indexer')
357
384
 
358
- isp = subp.add_parser('install-server', help='Install server as a systemd service (for autostart)', formatter_class=F)
385
+ isp = subp.add_parser(
386
+ 'install-server', help='Install server as a systemd service (for autostart)', formatter_class=F
387
+ )
359
388
  install_server.setup_parser(isp)
360
389
 
361
390
  cp = subp.add_parser('config', help='Config management')
362
- cp.set_defaults(func=lambda *args: cp.print_help())
391
+ cp.set_defaults(func=lambda *_args: cp.print_help())
363
392
  scp = cp.add_subparsers()
364
393
  ccp = scp.add_parser('check', help='Check config')
365
394
  ccp.set_defaults(func=config_check)
366
395
  ccp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
367
396
 
368
397
  icp = scp.add_parser('create', help='Create user config')
369
- icp.add_argument(
370
- "--config", type=Path, default=default_config_path(), help="Config path"
371
- )
398
+ icp.add_argument("--config", type=Path, default=default_config_path(), help="Config path")
372
399
  icp.set_defaults(func=config_create)
373
400
 
374
401
  dp = subp.add_parser('doctor', help='Troubleshooting assistant')
375
402
  dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
376
- dp.set_defaults(func=lambda *args: dp.print_help())
403
+ dp.set_defaults(func=lambda *_args: dp.print_help())
377
404
  sdp = dp.add_subparsers()
378
- sdp.add_parser('config' , help='Check config' ).set_defaults(func=config_check )
405
+ sdp.add_parser('config', help='Check config').set_defaults(func=config_check)
379
406
  sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db)
380
- sdps = sdp.add_parser('server' , help='Check server' )
407
+ sdps = sdp.add_parser('server', help='Check server')
381
408
  sdps.set_defaults(func=cli_doctor_server)
382
409
  add_port_arg(sdps)
383
410
 
@@ -396,7 +423,7 @@ def main() -> None:
396
423
  # the only downside is storage. dunno.
397
424
  # worst case -- could use database?
398
425
 
399
- with get_tmpdir() as tdir: # TODO??
426
+ with get_tmpdir() as _tdir: # TODO what's the tmp dir for??
400
427
  if mode == 'index':
401
428
  errors = do_index(
402
429
  config_file=args.config,
@@ -420,15 +447,14 @@ def main() -> None:
420
447
  name=args.name,
421
448
  sources_subset=args.sources,
422
449
  overwrite_db=args.overwrite,
423
- )
424
- elif mode == 'install-server': # todo rename to 'autostart' or something?
450
+ )
451
+ elif mode == 'install-server': # todo rename to 'autostart' or something?
425
452
  install_server.install(args)
426
- elif mode == 'config':
427
- args.func(args)
428
- elif mode == 'doctor':
453
+ elif mode == 'config' or mode == 'doctor':
429
454
  args.func(args)
430
455
  else:
431
456
  raise AssertionError(f'unexpected mode {mode}')
432
457
 
458
+
433
459
  if __name__ == '__main__':
434
460
  main()