promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +60 -35
  3. promnesia/cannon.py +27 -27
  4. promnesia/common.py +85 -67
  5. promnesia/compare.py +21 -22
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +23 -23
  8. promnesia/database/common.py +67 -0
  9. promnesia/database/dump.py +188 -0
  10. promnesia/{read_db.py → database/load.py} +16 -17
  11. promnesia/extract.py +14 -11
  12. promnesia/kjson.py +12 -11
  13. promnesia/logging.py +4 -4
  14. promnesia/misc/__init__.pyi +0 -0
  15. promnesia/misc/config_example.py +1 -2
  16. promnesia/misc/install_server.py +7 -9
  17. promnesia/server.py +57 -47
  18. promnesia/sources/__init__.pyi +0 -0
  19. promnesia/sources/auto.py +50 -35
  20. promnesia/sources/auto_logseq.py +6 -5
  21. promnesia/sources/auto_obsidian.py +2 -2
  22. promnesia/sources/browser.py +14 -9
  23. promnesia/sources/browser_legacy.py +26 -16
  24. promnesia/sources/demo.py +19 -3
  25. promnesia/sources/fbmessenger.py +3 -2
  26. promnesia/sources/filetypes.py +16 -7
  27. promnesia/sources/github.py +7 -9
  28. promnesia/sources/guess.py +2 -1
  29. promnesia/sources/hackernews.py +2 -2
  30. promnesia/sources/hpi.py +2 -2
  31. promnesia/sources/html.py +7 -5
  32. promnesia/sources/hypothesis.py +4 -3
  33. promnesia/sources/instapaper.py +2 -2
  34. promnesia/sources/markdown.py +31 -21
  35. promnesia/sources/org.py +27 -13
  36. promnesia/sources/plaintext.py +30 -29
  37. promnesia/sources/pocket.py +3 -2
  38. promnesia/sources/reddit.py +20 -19
  39. promnesia/sources/roamresearch.py +2 -1
  40. promnesia/sources/rss.py +4 -5
  41. promnesia/sources/shellcmd.py +19 -6
  42. promnesia/sources/signal.py +33 -24
  43. promnesia/sources/smscalls.py +2 -2
  44. promnesia/sources/stackexchange.py +4 -3
  45. promnesia/sources/takeout.py +76 -9
  46. promnesia/sources/takeout_legacy.py +24 -12
  47. promnesia/sources/telegram.py +13 -11
  48. promnesia/sources/telegram_legacy.py +18 -7
  49. promnesia/sources/twitter.py +6 -5
  50. promnesia/sources/vcs.py +5 -3
  51. promnesia/sources/viber.py +10 -9
  52. promnesia/sources/website.py +4 -4
  53. promnesia/sources/zulip.py +3 -2
  54. promnesia/sqlite.py +7 -4
  55. promnesia/tests/__init__.py +0 -0
  56. promnesia/tests/common.py +140 -0
  57. promnesia/tests/server_helper.py +67 -0
  58. promnesia/tests/sources/__init__.py +0 -0
  59. promnesia/tests/sources/test_auto.py +65 -0
  60. promnesia/tests/sources/test_filetypes.py +43 -0
  61. promnesia/tests/sources/test_hypothesis.py +39 -0
  62. promnesia/tests/sources/test_org.py +64 -0
  63. promnesia/tests/sources/test_plaintext.py +25 -0
  64. promnesia/tests/sources/test_shellcmd.py +21 -0
  65. promnesia/tests/sources/test_takeout.py +56 -0
  66. promnesia/tests/test_cannon.py +325 -0
  67. promnesia/tests/test_cli.py +40 -0
  68. promnesia/tests/test_compare.py +30 -0
  69. promnesia/tests/test_config.py +289 -0
  70. promnesia/tests/test_db_dump.py +222 -0
  71. promnesia/tests/test_extract.py +65 -0
  72. promnesia/tests/test_extract_urls.py +43 -0
  73. promnesia/tests/test_indexer.py +251 -0
  74. promnesia/tests/test_server.py +291 -0
  75. promnesia/tests/test_traverse.py +39 -0
  76. promnesia/tests/utils.py +35 -0
  77. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
  78. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  79. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  80. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
  81. promnesia/dump.py +0 -105
  82. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  83. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  84. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/server.py CHANGED
@@ -1,35 +1,45 @@
1
- #!/usr/bin/python3
2
1
  from __future__ import annotations
3
2
 
4
- __package__ = 'promnesia' # ugh. hacky way to make wsgi runner work properly...
5
-
6
3
  import argparse
7
- from dataclasses import dataclass
8
- from datetime import timedelta
9
- from functools import lru_cache
4
+ import importlib.metadata
10
5
  import json
11
6
  import logging
12
7
  import os
8
+ from dataclasses import dataclass
9
+ from datetime import timedelta
10
+ from functools import lru_cache
13
11
  from pathlib import Path
14
- from typing import List, NamedTuple, Dict, Optional, Any, Tuple, Protocol
15
-
12
+ from typing import Any, NamedTuple, Optional, Protocol
16
13
 
14
+ import fastapi
17
15
  import pytz
18
16
  from pytz import BaseTzInfo
19
-
20
- import fastapi
21
-
22
- from sqlalchemy import MetaData, exists, literal, between, or_, and_, exc, select
23
- from sqlalchemy import Column, Table, func, types
24
- from sqlalchemy.sql.elements import ColumnElement
17
+ from sqlalchemy import (
18
+ Column,
19
+ Table,
20
+ and_,
21
+ between,
22
+ exc,
23
+ func,
24
+ literal,
25
+ or_,
26
+ select,
27
+ types,
28
+ )
25
29
  from sqlalchemy.sql import text
30
+ from sqlalchemy.sql.elements import ColumnElement
26
31
 
27
-
28
- from .common import PathWithMtime, DbVisit, Url, setup_logger, default_output_dir, get_system_tz
29
32
  from .cannon import canonify
33
+ from .common import (
34
+ DbVisit,
35
+ PathWithMtime,
36
+ default_output_dir,
37
+ get_system_tz,
38
+ setup_logger,
39
+ )
40
+ from .database.load import DbStuff, get_db_stuff, row_to_db_visit
30
41
 
31
-
32
- Json = Dict[str, Any]
42
+ Json = dict[str, Any]
33
43
 
34
44
  app = fastapi.FastAPI()
35
45
 
@@ -51,8 +61,7 @@ def get_logger() -> logging.Logger:
51
61
 
52
62
 
53
63
  def get_version() -> str:
54
- from pkg_resources import get_distribution
55
- return get_distribution(__package__).version
64
+ return importlib.metadata.version(__package__)
56
65
 
57
66
 
58
67
  class ServerConfig(NamedTuple):
@@ -66,7 +75,7 @@ class ServerConfig(NamedTuple):
66
75
  })
67
76
 
68
77
  @classmethod
69
- def from_str(cls, cfgs: str) -> 'ServerConfig':
78
+ def from_str(cls, cfgs: str) -> ServerConfig:
70
79
  d = json.loads(cfgs)
71
80
  return cls(
72
81
  db =Path (d['db']),
@@ -112,15 +121,13 @@ def as_json(v: DbVisit) -> Json:
112
121
  }
113
122
 
114
123
 
115
- def get_db_path(check: bool=True) -> Path:
124
+ def get_db_path(*, check: bool=True) -> Path:
116
125
  db = EnvConfig.get().db
117
126
  if check:
118
127
  assert db.exists(), db
119
128
  return db
120
129
 
121
130
 
122
- from .read_db import DbStuff, get_db_stuff
123
-
124
131
  @lru_cache(1)
125
132
  # PathWithMtime aids lru_cache in reloading the sqlalchemy binder
126
133
  def _get_stuff(db_path: PathWithMtime) -> DbStuff:
@@ -128,7 +135,7 @@ def _get_stuff(db_path: PathWithMtime) -> DbStuff:
128
135
  return get_db_stuff(db_path=db_path.path)
129
136
 
130
137
 
131
- def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
138
+ def get_stuff(db_path: Path | None=None) -> DbStuff: # TODO better name
132
139
  # ok, it will always load from the same db file; but intermediate would be kinda an optional dump.
133
140
  if db_path is None:
134
141
  db_path = get_db_path()
@@ -136,10 +143,10 @@ def get_stuff(db_path: Optional[Path]=None) -> DbStuff: # TODO better name
136
143
 
137
144
 
138
145
  def db_stats(db_path: Path) -> Json:
139
- engine, binder, table = get_stuff(db_path)
146
+ engine, table = get_stuff(db_path)
140
147
  query = select(func.count()).select_from(table)
141
148
  with engine.connect() as conn:
142
- total = list(conn.execute(query))[0][0]
149
+ [(total,)] = conn.execute(query)
143
150
  return {
144
151
  'total_visits': total,
145
152
  }
@@ -151,8 +158,8 @@ class Where(Protocol):
151
158
 
152
159
  @dataclass
153
160
  class VisitsResponse:
154
- original_url: Url
155
- normalised_url: Url
161
+ original_url: str
162
+ normalised_url: str
156
163
  visits: Any
157
164
 
158
165
 
@@ -167,7 +174,7 @@ def search_common(url: str, where: Where) -> VisitsResponse:
167
174
  url = original_url
168
175
  logger.info('normalised url: %s', url)
169
176
 
170
- engine, binder, table = get_stuff()
177
+ engine, table = get_stuff()
171
178
 
172
179
  query = table.select().where(where(table=table, url=url))
173
180
  logger.debug('query: %s', query)
@@ -175,17 +182,17 @@ def search_common(url: str, where: Where) -> VisitsResponse:
175
182
  with engine.connect() as conn:
176
183
  try:
177
184
  # TODO make more defensive here
178
- visits: List[DbVisit] = [binder.from_row(row) for row in conn.execute(query)]
185
+ visits: list[DbVisit] = [row_to_db_visit(row) for row in conn.execute(query)]
179
186
  except exc.OperationalError as e:
180
187
  if getattr(e, 'msg', None) == 'no such table: visits':
181
- logger.warn('you may have to run indexer first!')
188
+ logger.warning('you may have to run indexer first!')
182
189
  #result['visits'] = [{an error with a msg}] # TODO
183
190
  #return result
184
191
  raise
185
192
 
186
193
  logger.debug('got %d visits from db', len(visits))
187
194
 
188
- vlist: List[DbVisit] = []
195
+ vlist: list[DbVisit] = []
189
196
  for vis in visits:
190
197
  dt = vis.dt
191
198
  if dt.tzinfo is None: # FIXME need this for /visits endpoint as well?
@@ -228,10 +235,11 @@ def status() -> Json:
228
235
  logger.exception(e)
229
236
  stats = {'ERROR': str(e)}
230
237
 
231
- version: Optional[str]
238
+ version: str | None
232
239
  try:
233
240
  version = get_version()
234
241
  except Exception as e:
242
+ logger.exception(e)
235
243
  version = None
236
244
 
237
245
  return {
@@ -241,10 +249,9 @@ def status() -> Json:
241
249
  }
242
250
 
243
251
 
244
- from dataclasses import dataclass
245
252
  @dataclass
246
253
  class VisitsRequest:
247
- url: Url
254
+ url: str
248
255
 
249
256
  @app.get ('/visits', response_model=VisitsResponse)
250
257
  @app.post('/visits', response_model=VisitsResponse)
@@ -255,15 +262,17 @@ def visits(request: VisitsRequest) -> VisitsResponse:
255
262
  url=url,
256
263
  # odd, doesn't work just with: x or (y and z)
257
264
  where=lambda table, url: or_(
258
- table.c.norm_url == url, # exact match
259
- and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True)) # + child visits, but only 'interesting' ones
265
+ # exact match
266
+ table.c.norm_url == url,
267
+ # + child visits, but only 'interesting' ones
268
+ and_(table.c.context != None, table.c.norm_url.startswith(url, autoescape=True)) # noqa: E711
260
269
  ),
261
270
  )
262
271
 
263
272
 
264
273
  @dataclass
265
274
  class SearchRequest:
266
- url: Url
275
+ url: str
267
276
 
268
277
  @app.get ('/search', response_model=VisitsResponse)
269
278
  @app.post('/search', response_model=VisitsResponse)
@@ -300,7 +309,7 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
300
309
 
301
310
  return search_common(
302
311
  url='http://dummy.org', # NOTE: not used in the where query (below).. perhaps need to get rid of this
303
- where=lambda table, url: between(
312
+ where=lambda table, url: between( # noqa: ARG005
304
313
  func.strftime(
305
314
  '%s', # NOTE: it's tz aware, e.g. would distinguish +05:00 vs -03:00
306
315
  # this is a bit fragile, relies on cachew internal timestamp format, e.g.
@@ -323,25 +332,26 @@ def search_around(request: SearchAroundRequest) -> VisitsResponse:
323
332
  _NO_VERSION = (0, 11, 14)
324
333
  _LATEST = (9999, 9999, 9999)
325
334
 
326
- def as_version(version: str) -> Tuple[int, int, int]:
335
+ def as_version(version: str) -> tuple[int, int, int]:
327
336
  if version == '':
328
337
  return _NO_VERSION
329
338
  try:
330
339
  [v1, v2, v3] = map(int, version.split('.'))
331
- return (v1, v2, v3)
332
340
  except Exception as e:
333
341
  logger = get_logger()
334
342
  logger.error('error while parsing version %s', version)
335
343
  logger.exception(e)
336
344
  return _LATEST
345
+ else:
346
+ return (v1, v2, v3)
337
347
 
338
348
 
339
349
  @dataclass
340
350
  class VisitedRequest:
341
- urls: List[str]
351
+ urls: list[str]
342
352
  client_version: str = ''
343
353
 
344
- VisitedResponse = List[Optional[Json]]
354
+ VisitedResponse = list[Optional[Json]]
345
355
 
346
356
  @app.get ('/visited', response_model=VisitedResponse)
347
357
  @app.post('/visited', response_model=VisitedResponse)
@@ -356,12 +366,12 @@ def visited(request: VisitedRequest) -> VisitedResponse:
356
366
  version = as_version(client_version)
357
367
 
358
368
  nurls = [canonify(u) for u in urls]
359
- snurls = list(sorted(set(nurls)))
369
+ snurls = sorted(set(nurls))
360
370
 
361
371
  if len(snurls) == 0:
362
372
  return []
363
373
 
364
- engine, binder, table = get_stuff()
374
+ engine, table = get_stuff()
365
375
 
366
376
  # sqlalchemy doesn't seem to support SELECT FROM (VALUES (...)) in its api
367
377
  # also doesn't support array binding...
@@ -389,7 +399,7 @@ SELECT queried, visits.*
389
399
  # brings down large queries to 50ms...
390
400
  with engine.connect() as conn:
391
401
  res = list(conn.execute(query))
392
- present: Dict[str, Any] = {row[0]: binder.from_row(row[1:]) for row in res}
402
+ present: dict[str, Any] = {row[0]: row_to_db_visit(row[1:]) for row in res}
393
403
  results = []
394
404
  for nu in nurls:
395
405
  r = present.get(nu, None)
File without changes
promnesia/sources/auto.py CHANGED
@@ -5,34 +5,46 @@
5
5
  - autodetects Obsidian vault and adds `obsidian://` app protocol support [[file:../src/promnesia/sources/obsidian.py][promnesia.sources.obsidian]]
6
6
  - autodetects Logseq graph and adds `logseq://` app protocol support [[file:../src/promnesia/sources/logseq.py][promnesia.sources.logseq]]
7
7
  """
8
+ from __future__ import annotations
8
9
 
9
10
  import csv
10
- from concurrent.futures import ProcessPoolExecutor as Pool
11
- from contextlib import nullcontext
12
- from datetime import datetime
13
11
  import itertools
14
12
  import json
15
13
  import os
16
- from typing import Optional, Iterable, Union, List, Tuple, NamedTuple, Sequence, Iterator, Iterable, Callable, Any, Dict, Set
14
+ from collections.abc import Iterable, Iterator, Sequence
15
+ from concurrent.futures import ProcessPoolExecutor as Pool
16
+ from contextlib import nullcontext
17
17
  from fnmatch import fnmatch
18
+ from functools import wraps
18
19
  from pathlib import Path
19
- from functools import lru_cache, wraps
20
- import warnings
21
-
22
- import pytz
23
-
24
- from ..common import Visit, Url, PathIsh, get_logger, Loc, get_tmpdir, extract_urls, Extraction, Result, Results, mime, traverse, file_mtime, echain, logger
25
- from ..config import use_cores
20
+ from typing import Any, Callable, NamedTuple, Optional
21
+
22
+ from promnesia.common import (
23
+ Loc,
24
+ PathIsh,
25
+ Result,
26
+ Results,
27
+ Visit,
28
+ echain,
29
+ extract_urls,
30
+ file_mtime,
31
+ get_logger,
32
+ get_tmpdir,
33
+ logger,
34
+ mime,
35
+ traverse,
36
+ warn_once,
37
+ )
38
+ from promnesia.config import use_cores
26
39
 
27
-
28
- from .filetypes import EUrl
29
- from .auto_obsidian import obsidian_replacer
30
40
  from .auto_logseq import logseq_replacer
41
+ from .auto_obsidian import obsidian_replacer
42
+ from .filetypes import Ctx, EUrl
31
43
 
32
44
 
33
- def _collect(thing, path: List[str], result: List[EUrl]) -> None:
45
+ def _collect(thing, path: list[str], result: list[EUrl]) -> None:
34
46
  if isinstance(thing, str):
35
- ctx: Ctx = tuple(path) # type: ignore
47
+ ctx: Ctx = tuple(path)
36
48
  result.extend([EUrl(url=u, ctx=ctx) for u in extract_urls(thing)])
37
49
  elif isinstance(thing, list):
38
50
  path.append('[]')
@@ -50,9 +62,9 @@ def _collect(thing, path: List[str], result: List[EUrl]) -> None:
50
62
 
51
63
 
52
64
  # TODO mm. okay, I suppose could use kython consuming thingy?..
53
- def collect_from(thing) -> List[EUrl]:
54
- uuu: List[EUrl] = []
55
- path: List[str] = []
65
+ def collect_from(thing) -> list[EUrl]:
66
+ uuu: list[EUrl] = []
67
+ path: list[str] = []
56
68
  _collect(thing, path, uuu)
57
69
  return uuu
58
70
 
@@ -84,7 +96,7 @@ def _plaintext(path: Path) -> Results:
84
96
  def fallback(ex):
85
97
  """Falls back to plaintext in case of issues"""
86
98
 
87
- fallback_active: Dict[Any, bool] = {}
99
+ fallback_active: dict[Any, bool] = {}
88
100
  @wraps(ex)
89
101
  def wrapped(path: Path):
90
102
  nonlocal fallback_active
@@ -98,7 +110,7 @@ def fallback(ex):
98
110
  except ModuleNotFoundError as me:
99
111
  logger = get_logger()
100
112
  logger.exception(me)
101
- logger.warn('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
113
+ logger.warning('%s: %s not found, falling back to grep! "pip3 install --user %s" for better support!', path, me.name, me.name)
102
114
  yield me
103
115
  fallback_active[ex] = True
104
116
  do_fallback = True
@@ -125,7 +137,7 @@ def _org(path: Path) -> Results:
125
137
  return org.extract_from_file(path)
126
138
 
127
139
 
128
- from .filetypes import TYPE2IDX, type2idx, IGNORE, CODE
140
+ from .filetypes import CODE, IGNORE, TYPE2IDX, type2idx
129
141
 
130
142
  TYPE2IDX.update({
131
143
  'application/json': _json,
@@ -167,8 +179,8 @@ for t in CODE:
167
179
  Replacer = Optional[Callable[[str, str], str]]
168
180
 
169
181
  def index(
170
- *paths: Union[PathIsh],
171
- ignored: Union[Sequence[str], str]=(),
182
+ *paths: PathIsh,
183
+ ignored: Sequence[str] | str=(),
172
184
  follow: bool=True,
173
185
  replacer: Replacer=None,
174
186
  ) -> Results:
@@ -209,10 +221,10 @@ class Options(NamedTuple):
209
221
  # TODO option to add ignores? not sure..
210
222
  # TODO I don't like this replacer thing... think about removing it
211
223
  replacer: Replacer
212
- root: Optional[Path]=None
224
+ root: Path | None=None
213
225
 
214
226
 
215
- def _index_file_aux(path: Path, opts: Options) -> Union[Exception, List[Result]]:
227
+ def _index_file_aux(path: Path, opts: Options) -> Exception | list[Result]:
216
228
  # just a helper for the concurrent version (the generator isn't picklable)
217
229
  try:
218
230
  return list(_index_file(path, opts=opts))
@@ -247,7 +259,7 @@ def _index(path: Path, opts: Options) -> Results:
247
259
  continue
248
260
 
249
261
  p = p.resolve()
250
- if not os.path.exists(p):
262
+ if not os.path.exists(p): # noqa: PTH110
251
263
  logger.debug('ignoring %s: broken symlink?', p)
252
264
  continue
253
265
 
@@ -265,8 +277,10 @@ def _index(path: Path, opts: Options) -> Results:
265
277
 
266
278
 
267
279
  Mime = str
268
- from .filetypes import Ex # meh
269
- def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
280
+ from .filetypes import Ex # meh
281
+
282
+
283
+ def by_path(pp: Path) -> tuple[Ex | None, Mime | None]:
270
284
  suf = pp.suffix.lower()
271
285
  # firt check suffixes, it's faster
272
286
  s = type2idx(suf)
@@ -282,6 +296,8 @@ def by_path(pp: Path) -> Tuple[Optional[Ex], Optional[Mime]]:
282
296
 
283
297
  def _index_file(pp: Path, opts: Options) -> Results:
284
298
  logger = get_logger()
299
+ # TODO need to keep debug logs here...
300
+ # logger.info(f"indexing {pp}")
285
301
  # TODO use kompress?
286
302
  # TODO not even sure if it's used...
287
303
  suf = pp.suffix.lower()
@@ -307,18 +323,17 @@ def _index_file(pp: Path, opts: Options) -> Results:
307
323
 
308
324
  ip, pm = by_path(pp)
309
325
  if ip is None:
310
- # TODO use warning (with mime/ext as key?)
311
- # TODO only log once? # hmm..
326
+ # todo not really sure about using warnings vs yielding error here?
312
327
  msg = f'No extractor for suffix {suf}, mime {pm}'
313
- warnings.warn(msg)
328
+ warn_once(msg)
314
329
  yield echain(ex, RuntimeError(msg))
315
330
  return
316
331
 
317
332
  logger.debug('indexing via %s: %s', ip.__name__, pp)
318
333
 
319
- def indexer() -> Union[Urls, Results]:
334
+ def indexer() -> Urls | Results:
320
335
  # eh, annoying.. need to make more generic..
321
- idx = ip(pp) # type: ignore
336
+ idx = ip(pp)
322
337
  try:
323
338
  yield from idx
324
339
  except Exception as e:
@@ -351,7 +366,7 @@ def _index_file(pp: Path, opts: Options) -> Results:
351
366
  v = v._replace(locator=loc)
352
367
 
353
368
  if replacer is not None and root is not None:
354
- upd: Dict[str, Any] = {}
369
+ upd: dict[str, Any] = {}
355
370
  href = v.locator.href
356
371
  if href is not None:
357
372
  upd['locator'] = v.locator._replace(href=replacer(href, str(root)), title=replacer(v.locator.title, str(root)))
@@ -1,14 +1,15 @@
1
1
  import os.path
2
2
  import urllib.parse
3
3
 
4
+
4
5
  def logseq_replacer(path: str, root: str) -> str:
5
- if not path.startswith("editor://") or not (path.endswith('.md') or path.endswith('.org')):
6
+ if not path.startswith("editor://") or not (path.endswith((".md", ".org"))):
6
7
  return path
7
-
8
- graph = os.path.basename(root)
9
- page_name = os.path.basename(path).rsplit('.', 1)[0]
8
+
9
+ graph = os.path.basename(root) # noqa: PTH119
10
+ page_name = os.path.basename(path).rsplit('.', 1)[0] # noqa: PTH119
10
11
  encoded_page_name = urllib.parse.quote(page_name)
11
-
12
+
12
13
  uri = f"logseq://graph/{graph}?page={encoded_page_name}"
13
14
 
14
15
  return uri
@@ -1,8 +1,8 @@
1
1
  def obsidian_replacer(p: str, r: str) -> str:
2
2
  if not p.startswith("editor://") or not p.endswith('.md'):
3
3
  return p
4
-
4
+
5
5
  path = p.split('/', 2)[-1]
6
-
6
+
7
7
  uri = f"obsidian://{path}"
8
8
  return uri
@@ -2,15 +2,18 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for visits from web browsers.
3
3
  '''
4
4
 
5
+ from __future__ import annotations
6
+
5
7
  import re
6
- from typing import Optional, Iterator, Any, TYPE_CHECKING
7
8
  import warnings
9
+ from collections.abc import Iterator
10
+ from typing import TYPE_CHECKING, Any
8
11
 
9
- from promnesia.common import Results, Visit, Loc, Second, PathIsh, logger, is_sqlite_db
12
+ from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
10
13
 
11
14
 
12
- def index(p: Optional[PathIsh]=None) -> Results:
13
- from . import hpi
15
+ def index(p: PathIsh | None = None) -> Results:
16
+ from . import hpi # noqa: F401,I001
14
17
 
15
18
  if p is None:
16
19
  from my.browser.all import history
@@ -24,10 +27,11 @@ def index(p: Optional[PathIsh]=None) -> Results:
24
27
  )
25
28
  try:
26
29
  yield from _index_new_with_adhoc_config(path=p)
27
- return
28
30
  except Exception as e:
29
31
  logger.exception(e)
30
32
  warnings.warn("Hacking my.config.browser.export didn't work. You probably need to update HPI.")
33
+ else:
34
+ return
31
35
 
32
36
  logger.warning("Falling back onto legacy promnesia.sources.browser_legacy module")
33
37
  yield from _index_old(path=p)
@@ -35,11 +39,12 @@ def index(p: Optional[PathIsh]=None) -> Results:
35
39
 
36
40
  def _index_old(*, path: PathIsh) -> Results:
37
41
  from . import browser_legacy
42
+
38
43
  yield from browser_legacy.index(path)
39
44
 
40
45
 
41
46
  def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
42
- from . import hpi
47
+ from . import hpi # noqa: F401,I001
43
48
 
44
49
  ## previously, it was possible to index be called with multiple different db search paths
45
50
  ## this would result in each subsequent call to my.browser.export.history to invalidate cache every time
@@ -50,7 +55,7 @@ def _index_new_with_adhoc_config(*, path: PathIsh) -> Results:
50
55
  cache_override = None if hpi_cache_dir is None else hpi_cache_dir / sanitized_path
51
56
  ##
52
57
 
53
- from my.core.common import classproperty, Paths, get_files
58
+ from my.core.common import Paths, classproperty, get_files
54
59
  class config:
55
60
  class core:
56
61
  cache_dir = cache_override
@@ -75,8 +80,8 @@ else:
75
80
 
76
81
  def _index_new(history: Iterator[BrowserMergeVisit]) -> Results:
77
82
  for v in history:
78
- desc: Optional[str] = None
79
- duration: Optional[Second] = None
83
+ desc: str | None = None
84
+ duration: Second | None = None
80
85
  metadata = v.metadata
81
86
  if metadata is not None:
82
87
  desc = metadata.title
@@ -1,16 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
1
4
  from datetime import datetime
2
5
  from pathlib import Path
3
6
  from urllib.parse import unquote
4
- import sqlite3
5
- from typing import List, Set
6
7
 
7
8
  import pytz
8
9
 
9
- from ..common import PathIsh, Results, Visit, Loc, logger, Second, is_sqlite_db
10
- from .. import config
10
+ from promnesia import config
11
+ from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
11
12
 
12
- # todo mcachew?
13
- from cachew import cachew
13
+ try:
14
+ from cachew import cachew
15
+ except ModuleNotFoundError as me:
16
+ if me.name != 'cachew':
17
+ raise me
18
+ # this module is legacy anyway, so just make it defensive
19
+ def cachew(*args, **kwargs): # type: ignore[no-redef]
20
+ return lambda f: f
14
21
 
15
22
 
16
23
  def index(p: PathIsh) -> Results:
@@ -29,21 +36,21 @@ def index(p: PathIsh) -> Results:
29
36
 
30
37
 
31
38
 
32
- def _index_dbs(dbs: List[Path], cachew_name: str):
39
+ def _index_dbs(dbs: list[Path], cachew_name: str):
33
40
  # TODO right... not ideal, need to think how to handle it properly...
34
41
  import sys
35
42
  sys.setrecursionlimit(5000)
36
43
 
37
44
  cache_dir = config.get().cache_dir
38
45
  cpath = None if cache_dir is None else cache_dir / cachew_name
39
- emitted: Set = set()
46
+ emitted: set = set()
40
47
  yield from _index_dbs_aux(cpath, dbs, emitted=emitted)
41
48
 
42
49
 
43
50
  # todo wow, stack traces are ridiculous here...
44
51
  # todo hmm, feels like it should be a class or something?
45
- @cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
46
- def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
52
+ @cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger) # noqa: ARG005
53
+ def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results:
47
54
  if len(dbs) == 0:
48
55
  return
49
56
 
@@ -58,7 +65,7 @@ def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
58
65
  xs_was_cached = True
59
66
  logger.debug('seems that %d first items were previously cached', len(xs))
60
67
  if xs_was_cached:
61
- key = (r.url, r.dt)
68
+ key = str(r) if isinstance(r, Exception) else (r.url, r.dt)
62
69
  assert key not in emitted, key # todo not sure if this assert is necessary?
63
70
  # hmm ok it might happen if we messed up with indexing individual db?
64
71
  # alternatively, could abuse it to avoid messing with 'emitted' in _index_db?
@@ -69,7 +76,7 @@ def _index_dbs_aux(cache_path: Path, dbs: List[Path], emitted: Set) -> Results:
69
76
  yield from _index_db(db, emitted=emitted)
70
77
 
71
78
 
72
- def _index_db(db: Path, emitted: Set):
79
+ def _index_db(db: Path, emitted: set):
73
80
  logger.info('processing %s', db) # debug level?
74
81
 
75
82
  # todo schema check (not so critical for cachew though)
@@ -115,17 +122,20 @@ Col = str
115
122
  ColType = str
116
123
 
117
124
 
118
- from typing import Any, NamedTuple, Tuple, Union, Sequence, Optional
125
+ from collections.abc import Sequence
126
+ from typing import NamedTuple, Union
127
+
119
128
 
120
129
  class Schema(NamedTuple):
121
- cols: Sequence[Tuple[Col, ColType]]
130
+ cols: Sequence[tuple[Col, ColType]]
122
131
  key: Sequence[str]
123
132
 
124
133
 
125
- SchemaCheck = Tuple[str, Union[str, Sequence[str]]] # todo Union: meh
134
+ SchemaCheck = tuple[str, Union[str, Sequence[str]]] # todo Union: meh
126
135
 
127
136
  from dataclasses import dataclass
128
137
 
138
+
129
139
  # todo protocol?
130
140
  @dataclass
131
141
  class Extr:
@@ -173,7 +183,7 @@ class Chrome(Extr):
173
183
  dt = chrome_time_to_utc(int(ts))
174
184
  url = unquote(url) # chrome urls are all quoted
175
185
  dd = int(durs)
176
- dur: Optional[Second] = None if dd == 0 else dd // 1_000_000
186
+ dur: Second | None = None if dd == 0 else dd // 1_000_000
177
187
  return Visit(
178
188
  url=url,
179
189
  dt=dt,