promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. promnesia/__init__.py +4 -1
  2. promnesia/__main__.py +72 -59
  3. promnesia/cannon.py +90 -89
  4. promnesia/common.py +74 -62
  5. promnesia/compare.py +15 -10
  6. promnesia/config.py +22 -17
  7. promnesia/database/dump.py +1 -2
  8. promnesia/extract.py +6 -6
  9. promnesia/logging.py +27 -15
  10. promnesia/misc/install_server.py +25 -19
  11. promnesia/server.py +69 -53
  12. promnesia/sources/auto.py +65 -51
  13. promnesia/sources/browser.py +7 -2
  14. promnesia/sources/browser_legacy.py +51 -40
  15. promnesia/sources/demo.py +0 -1
  16. promnesia/sources/fbmessenger.py +0 -1
  17. promnesia/sources/filetypes.py +15 -11
  18. promnesia/sources/github.py +4 -1
  19. promnesia/sources/guess.py +4 -1
  20. promnesia/sources/hackernews.py +5 -7
  21. promnesia/sources/hpi.py +3 -1
  22. promnesia/sources/html.py +4 -2
  23. promnesia/sources/instapaper.py +1 -0
  24. promnesia/sources/markdown.py +4 -4
  25. promnesia/sources/org.py +17 -8
  26. promnesia/sources/plaintext.py +14 -11
  27. promnesia/sources/pocket.py +2 -1
  28. promnesia/sources/reddit.py +5 -8
  29. promnesia/sources/roamresearch.py +3 -1
  30. promnesia/sources/rss.py +4 -5
  31. promnesia/sources/shellcmd.py +3 -6
  32. promnesia/sources/signal.py +14 -14
  33. promnesia/sources/smscalls.py +0 -1
  34. promnesia/sources/stackexchange.py +2 -2
  35. promnesia/sources/takeout.py +14 -21
  36. promnesia/sources/takeout_legacy.py +16 -10
  37. promnesia/sources/telegram.py +7 -3
  38. promnesia/sources/telegram_legacy.py +5 -5
  39. promnesia/sources/twitter.py +1 -1
  40. promnesia/sources/vcs.py +6 -3
  41. promnesia/sources/viber.py +2 -2
  42. promnesia/sources/website.py +4 -3
  43. promnesia/sqlite.py +10 -7
  44. promnesia/tests/common.py +2 -0
  45. promnesia/tests/server_helper.py +2 -2
  46. promnesia/tests/sources/test_filetypes.py +9 -7
  47. promnesia/tests/sources/test_hypothesis.py +7 -3
  48. promnesia/tests/sources/test_org.py +7 -2
  49. promnesia/tests/sources/test_plaintext.py +9 -7
  50. promnesia/tests/sources/test_shellcmd.py +10 -9
  51. promnesia/tests/test_cannon.py +254 -237
  52. promnesia/tests/test_cli.py +8 -2
  53. promnesia/tests/test_compare.py +16 -12
  54. promnesia/tests/test_db_dump.py +4 -3
  55. promnesia/tests/test_extract.py +7 -4
  56. promnesia/tests/test_indexer.py +10 -10
  57. promnesia/tests/test_server.py +10 -10
  58. promnesia/tests/utils.py +1 -5
  59. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  60. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  61. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  62. promnesia/kjson.py +0 -122
  63. promnesia/sources/__init__.pyi +0 -0
  64. promnesia-1.3.20241021.dist-info/METADATA +0 -55
  65. promnesia-1.3.20241021.dist-info/RECORD +0 -83
  66. promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
  67. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  68. {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
@@ -13,9 +13,11 @@ def index() -> Results:
13
13
  # although could raise a warning on top level, when source emitted no takeouts
14
14
 
15
15
  # TODO youtube?
16
+ # fmt: off
16
17
  google_activities = [read_google_activity(t) for t in takeouts]
17
18
  search_activities = [read_search_activity(t) for t in takeouts]
18
19
  browser_histories = [read_browser_history_json(t) for t in takeouts]
20
+ # fmt: on
19
21
 
20
22
  key = lambda v: (v.dt, v.url)
21
23
  return chain(
@@ -25,14 +27,12 @@ def index() -> Results:
25
27
  )
26
28
 
27
29
 
28
-
29
30
  import json
30
31
  from collections.abc import Iterable
31
- from datetime import datetime
32
+ from datetime import datetime, timezone
32
33
  from itertools import chain
33
34
  from pathlib import Path
34
35
 
35
- import pytz
36
36
  from more_itertools import unique_everseen
37
37
 
38
38
  from promnesia import config
@@ -42,6 +42,7 @@ try:
42
42
  except ModuleNotFoundError as me:
43
43
  if me.name != 'cachew':
44
44
  raise me
45
+
45
46
  # this module is legacy anyway, so just make it defensive
46
47
  def cachew(*args, **kwargs): # type: ignore[no-redef]
47
48
  return lambda f: f
@@ -53,7 +54,7 @@ TakeoutPath = Path
53
54
 
54
55
  def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
55
56
  # FIXME switch to actual kompress? and use CPath?
56
- from my.core.kompress import kexists
57
+ from my.core.kompress import kexists # type: ignore[attr-defined]
57
58
 
58
59
  # TODO glob
59
60
  # TODO not sure about windows path separators??
@@ -65,6 +66,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
65
66
 
66
67
  locator = Loc.file(spath)
67
68
  from my.google.takeout.html import read_html
69
+
68
70
  for dt, url, _title in read_html(takeout, spath):
69
71
  yield Visit(
70
72
  url=url,
@@ -73,6 +75,7 @@ def _read_myactivity_html(takeout: TakeoutPath, kind: str) -> Iterable[Visit]:
73
75
  debug=kind,
74
76
  )
75
77
 
78
+
76
79
  def _cpath(suffix: str):
77
80
  def fun(takeout: TakeoutPath):
78
81
  cache_dir = config.get().cache_dir
@@ -80,23 +83,27 @@ def _cpath(suffix: str):
80
83
  return None
81
84
  # doesn't need a nontrivial hash function, timestsamp is encoded in name
82
85
  return cache_dir / (takeout.name + '_' + suffix + '.cache')
86
+
83
87
  return fun
84
88
 
85
89
 
86
90
  # todo caching should this be HPI responsibility?
87
91
  # todo set global cachew logging on init?
88
- @cachew(cache_path=_cpath('google_activity') , logger=logger)
92
+ @cachew(cache_path=_cpath('google_activity'), logger=logger)
89
93
  def read_google_activity(takeout: TakeoutPath) -> Iterable[Visit]:
90
94
  return _read_myactivity_html(takeout, 'Chrome/MyActivity.html')
91
95
 
92
- @cachew(cache_path=_cpath('search_activity') , logger=logger)
96
+
97
+ @cachew(cache_path=_cpath('search_activity'), logger=logger)
93
98
  def read_search_activity(takeout: TakeoutPath) -> Iterable[Visit]:
94
99
  return _read_myactivity_html(takeout, 'Search/MyActivity.html')
95
100
 
101
+
96
102
  # TODO add this to tests?
97
103
  @cachew(cache_path=_cpath('browser_activity'), logger=logger)
98
104
  def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
99
- from my.core.kompress import kexists, kopen
105
+ from my.core.kompress import kexists, kopen # type: ignore[attr-defined]
106
+
100
107
  # not sure if this deserves moving to HPI? it's pretty trivial for now
101
108
  spath = 'Takeout/Chrome/BrowserHistory.json'
102
109
 
@@ -111,13 +118,13 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
111
118
  # TODO this should be supported by HPI now?
112
119
 
113
120
  j = None
114
- with kopen(takeout, spath) as fo: # TODO iterative parser?
121
+ with kopen(takeout, spath) as fo: # TODO iterative parser?
115
122
  j = json.load(fo)
116
123
 
117
124
  hist = j['Browser History']
118
125
  for item in hist:
119
126
  url = item['url']
120
- time = datetime.fromtimestamp(item['time_usec'] / 10 ** 6, tz=pytz.utc)
127
+ time = datetime.fromtimestamp(item['time_usec'] / 10**6, tz=timezone.utc)
121
128
  # TODO any more interesitng info?
122
129
  yield Visit(
123
130
  url=url,
@@ -125,4 +132,3 @@ def read_browser_history_json(takeout: TakeoutPath) -> Iterable[Visit]:
125
132
  locator=locator,
126
133
  debug='Chrome/BrowserHistory.json',
127
134
  )
128
-
@@ -6,7 +6,7 @@ from urllib.parse import unquote # TODO mm, make it easier to rememember to use
6
6
  from promnesia.common import Loc, PathIsh, Results, Visit, extract_urls, logger
7
7
 
8
8
 
9
- def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_media_info: bool=False) -> Results:
9
+ def index(database: PathIsh | None = None, *, http_only: bool = False, with_extra_media_info: bool = False) -> Results:
10
10
  if database is None:
11
11
  # fully relying on HPI
12
12
  yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
@@ -17,7 +17,9 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
17
17
  f'Will try to hack database path {database} into HPI config.'
18
18
  )
19
19
  try:
20
- yield from _index_new_with_adhoc_config(database=database, http_only=http_only, with_extra_media_info=with_extra_media_info)
20
+ yield from _index_new_with_adhoc_config(
21
+ database=database, http_only=http_only, with_extra_media_info=with_extra_media_info
22
+ )
21
23
  except Exception as e:
22
24
  logger.exception(e)
23
25
  warnings.warn("Hacking my.config.telegram.telegram_backup didn't work. You probably need to update HPI.")
@@ -30,11 +32,12 @@ def index(database: PathIsh | None=None, *, http_only: bool=False, with_extra_me
30
32
 
31
33
  def _index_legacy(*, database: PathIsh, http_only: bool) -> Results:
32
34
  from . import telegram_legacy
35
+
33
36
  yield from telegram_legacy.index(database=database, http_only=http_only)
34
37
 
35
38
 
36
39
  def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_extra_media_info: bool) -> Results:
37
- from . import hpi # noqa: F401,I001
40
+ from . import hpi # noqa: F401
38
41
 
39
42
  class config:
40
43
  class telegram:
@@ -42,6 +45,7 @@ def _index_new_with_adhoc_config(*, database: PathIsh, http_only: bool, with_ext
42
45
  export_path: PathIsh = database
43
46
 
44
47
  from my.core.cfg import tmp_config
48
+
45
49
  with tmp_config(modules='my.telegram.telegram_backup', config=config):
46
50
  yield from _index_new(http_only=http_only, with_extra_media_info=with_extra_media_info)
47
51
 
@@ -18,7 +18,6 @@ from promnesia.common import (
18
18
  echain,
19
19
  extract_urls,
20
20
  from_epoch,
21
- get_logger,
22
21
  )
23
22
 
24
23
  from ..sqlite import sqlite_connection
@@ -32,15 +31,13 @@ def unwrap(res: T | Exception) -> T:
32
31
  return res
33
32
 
34
33
 
35
- def index(database: PathIsh, *, http_only: bool=False) -> Results:
34
+ def index(database: PathIsh, *, http_only: bool = False) -> Results:
36
35
  """
37
36
  :param database:
38
37
  the path of the sqlite generated by the _telegram_backup_ java program
39
38
  :param http_only:
40
39
  when true, do not collect IP-addresses and `python.py` strings
41
40
  """
42
- logger = get_logger()
43
-
44
41
  path = Path(database)
45
42
  assert path.is_file(), path
46
43
 
@@ -77,7 +74,8 @@ def index(database: PathIsh, *, http_only: bool=False) -> Results:
77
74
  M.message_type NOT IN ('service_message', 'empty_message')
78
75
  {extra_criteria}
79
76
  ORDER BY time;
80
- """)
77
+ """
78
+ )
81
79
 
82
80
  with sqlite_connection(path, immutable=True, row_factory='row') as db:
83
81
  # TODO yield error if chatname or chat or smth else is null?
@@ -105,6 +103,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
105
103
  urls = extract_urls(text)
106
104
  if len(urls) == 0:
107
105
  return
106
+ # fmt: off
108
107
  dt = from_epoch(row['time'])
109
108
  mid: str = unwrap(row['mid'])
110
109
 
@@ -112,6 +111,7 @@ def _handle_row(row: sqlite3.Row) -> Results:
112
111
  sender: str = unwrap(row['sender'])
113
112
  chatname: str = unwrap(row['chatname'])
114
113
  chat: str = unwrap(row['chat'])
114
+ # fmt: on
115
115
 
116
116
  in_context = f'https://t.me/{chat}/{mid}'
117
117
  for u in urls:
@@ -23,7 +23,7 @@ def index() -> Results:
23
23
  processed += 1
24
24
  try:
25
25
  urls = t.urls
26
- except Exception as e: # just in case..
26
+ except Exception as e: # just in case..
27
27
  yield e
28
28
  urls = []
29
29
 
promnesia/sources/vcs.py CHANGED
@@ -1,6 +1,7 @@
1
1
  '''
2
2
  Clones & indexes Git repositories (via sources.auto)
3
3
  '''
4
+
4
5
  from __future__ import annotations
5
6
 
6
7
  import re
@@ -22,7 +23,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
22
23
  # note: https://bugs.python.org/issue33617 , it doesn't like Path here on Windows
23
24
  check_call(['git', 'clone', repo, str(tp)])
24
25
 
25
- def replacer(p: PathIsh, prefix: str=str(tp), repo: str=repo) -> str:
26
+ def replacer(p: PathIsh, prefix: str = str(tp), repo: str = repo) -> str:
26
27
  ps = str(p)
27
28
  # TODO prefix is a bit misleading
28
29
  pos = ps.find(prefix)
@@ -31,13 +32,15 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
31
32
  return ps
32
33
  # TODO ugh. seems that blame view https://github.com/davidgasquez/handbook/blame/master/README.md#L25 is the most reliable
33
34
  # in raw mode can't jump onto line, when markdown is renderend can't jump either
34
- rest = ps[pos + len(prefix):]
35
- rest = re.sub(r':(\d+)$', r'#L\1', rest) # patch line number...
35
+ rest = ps[pos + len(prefix) :]
36
+ rest = re.sub(r':(\d+)$', r'#L\1', rest) # patch line number...
36
37
  return repo + '/blame/master' + rest
37
38
 
38
39
  # TODO doesn't work for git:
39
40
  # TODO think about something more generic... this isn't too sustainable
41
+
40
42
  # TODO not sure if context should be local or github?...
41
43
 
42
44
  from . import auto
45
+
43
46
  yield from auto.index(tp, *args, replacer=replacer, **kwargs)
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
19
19
 
20
20
  def index(
21
21
  db_path: PathIsh = "~/.ViberPC/*/viber.db",
22
- locator_schema: str="editor",
22
+ locator_schema: str = "editor",
23
23
  *,
24
24
  http_only: bool = False,
25
25
  ) -> Results:
@@ -40,7 +40,7 @@ def index(
40
40
  yield from _harvest_db(db, msgs_query, locator_schema)
41
41
 
42
42
 
43
- def messages_query(http_only: bool | None) -> str:
43
+ def messages_query(http_only: bool | None) -> str: # noqa: FBT001
44
44
  """
45
45
  An SQL-query returning 1 row for each message
46
46
 
@@ -27,7 +27,7 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
27
27
  '-A', 'html,html,txt', # TODO eh, ideally would use mime type I guess...
28
28
  '--no-parent',
29
29
  url,
30
- ]
30
+ ] # fmt: skip
31
31
  # TODO follow sitemap? e.g. gwern
32
32
  logger.info(' '.join(cmd))
33
33
  res = run(cmd, check=False)
@@ -39,12 +39,12 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
39
39
  # rest of the errors are a bit more critical..
40
40
  res.check_returncode()
41
41
 
42
- def replacer(p: PathIsh, prefix: str=str(tp), url: str=url) -> str:
42
+ def replacer(p: PathIsh, prefix: str = str(tp), url: str = url) -> str:
43
43
  ps = str(p)
44
44
  pos = ps.find(prefix)
45
45
  if pos == -1:
46
46
  return ps
47
- rest = ps[pos + len(prefix):]
47
+ rest = ps[pos + len(prefix) :]
48
48
  # now this should look kinda like /domain.tld/rest (due to the way wget downloads stuff)
49
49
  rest = re.sub(r'/.*?/', '/', rest)
50
50
  return url + rest
@@ -54,4 +54,5 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
54
54
 
55
55
  # TODO smarter html handling
56
56
  from . import auto
57
+
57
58
  yield from auto.index(tp, *args, replacer=replacer, **kwargs)
promnesia/sqlite.py CHANGED
@@ -1,25 +1,28 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import sqlite3
4
- from collections.abc import Iterator
4
+ from collections.abc import Callable, Iterator
5
5
  from contextlib import contextmanager
6
- from typing import Any, Callable, Literal, Union
7
-
8
- from .common import PathIsh
6
+ from pathlib import Path
7
+ from typing import Any, Literal
9
8
 
10
9
  # NOTE: copy pasted from HPI
11
10
 
12
11
  SqliteRowFactory = Callable[[sqlite3.Cursor, sqlite3.Row], Any]
13
12
 
13
+
14
14
  def dict_factory(cursor, row):
15
15
  fields = [column[0] for column in cursor.description]
16
- return dict(zip(fields, row))
16
+ return dict(zip(fields, row, strict=True))
17
+
17
18
 
19
+ Factory = SqliteRowFactory | Literal['row', 'dict']
18
20
 
19
- Factory = Union[SqliteRowFactory, Literal['row', 'dict']]
20
21
 
21
22
  @contextmanager
22
- def sqlite_connection(db: PathIsh, *, immutable: bool=False, row_factory: Factory | None=None) -> Iterator[sqlite3.Connection]:
23
+ def sqlite_connection(
24
+ db: Path | str, *, immutable: bool = False, row_factory: Factory | None = None
25
+ ) -> Iterator[sqlite3.Connection]:
23
26
  dbp = f'file:{db}'
24
27
  # https://www.sqlite.org/draft/uri.html#uriimmutable
25
28
  if immutable:
promnesia/tests/common.py CHANGED
@@ -59,6 +59,7 @@ def get_testdata(path: str) -> Path:
59
59
  @contextmanager
60
60
  def tmp_popen(*args, **kwargs):
61
61
  import psutil
62
+
62
63
  with psutil.Popen(*args, **kwargs) as p:
63
64
  try:
64
65
  yield p
@@ -99,6 +100,7 @@ def reset_filters():
99
100
  # TODO could be a TypeGuard from 3.10
100
101
  V = TypeVar('V')
101
102
 
103
+
102
104
  def unwrap(r: Res[V]) -> V:
103
105
  assert not isinstance(r, Exception), r
104
106
  return r
@@ -45,8 +45,8 @@ def run_server(db: PathIsh | None = None, *, timezone: str | None = None) -> Ite
45
45
  '--quiet',
46
46
  '--port', port,
47
47
  *([] if timezone is None else ['--timezone', timezone]),
48
- *([] if db is None else ['--db' , str(db)]),
49
- ]
48
+ *([] if db is None else ['--db', str(db)]),
49
+ ] # fmt: skip
50
50
  with tmp_popen(promnesia_bin(*args)) as server_process:
51
51
  server = Helper(host=host, port=port, process=server_process)
52
52
 
@@ -13,16 +13,16 @@ def handled(p: PathIsh) -> bool:
13
13
 
14
14
  def test_filetypes() -> None:
15
15
  # test media
16
- for ext in 'avi mp4 mp3 webm'.split() + ([] if windows else 'mkv'.split()):
16
+ for ext in ['avi', 'mp4', 'mp3', 'webm'] + ([] if windows else ['mkv']):
17
17
  assert handled('file.' + ext)
18
18
 
19
19
  # images
20
- for ext in 'gif jpg png jpeg'.split():
20
+ for ext in ['gif', 'jpg', 'png', 'jpeg']:
21
21
  assert handled('file.' + ext)
22
22
 
23
23
  # TODO more granual checks that these are ignored?
24
24
  # binaries
25
- for ext in 'o sqlite'.split() + ([] if windows else 'class jar'.split()):
25
+ for ext in ['o', 'sqlite'] + ([] if windows else ['class', 'jar']):
26
26
  assert handled('file.' + ext)
27
27
 
28
28
  # these might have potentially some links
@@ -31,13 +31,15 @@ def test_filetypes() -> None:
31
31
  'pdf', 'epub', 'ps',
32
32
  'doc', 'ppt', 'xsl',
33
33
  # seriously, windows doesn't know about docx???
34
- *([] if windows else 'docx pptx xlsx'.split()),
35
- *([] if windows else 'ods odt rtf'.split()),
36
- ] + ([] if windows else 'djvu'.split()):
34
+ *([] if windows else ['docx', 'pptx', 'xlsx']),
35
+ *([] if windows else ['ods', 'odt', 'rtf']),
36
+ ] + ([] if windows else ['djvu']): # fmt: skip
37
37
  assert handled('file.' + ext)
38
38
 
39
39
  # source code
40
- for ext in 'rs tex el js sh hs pl h py hpp c go css'.split() + ([] if windows else 'java cpp'.split()):
40
+ for ext in ['rs', 'tex', 'el', 'js', 'sh', 'hs', 'pl', 'h', 'py', 'hpp', 'c', 'go', 'css'] + (
41
+ [] if windows else ['java', 'cpp']
42
+ ):
41
43
  assert handled('file.' + ext)
42
44
 
43
45
  assert handled('x.html')
@@ -12,7 +12,7 @@ def index_hypothesis(tmp_path: Path) -> None:
12
12
  from promnesia.common import Source
13
13
  from promnesia.sources import hypothesis
14
14
 
15
- SOURCES = [Source(hypothesis.index, name='hyp')]
15
+ SOURCES = [Source(hypothesis.index, name='hyp')] # noqa: F841
16
16
 
17
17
  cfg_path = tmp_path / 'config.py'
18
18
  write_config(cfg_path, cfg)
@@ -35,5 +35,9 @@ def test_hypothesis(tmp_path: Path) -> None:
35
35
 
36
36
  assert vis.norm_url == 'wired.com/2017/04/the-myth-of-a-superhuman-ai'
37
37
  assert vis.orig_url == 'https://www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
38
- assert vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
39
- assert 'misconception about evolution is fueling misconception about AI' in (vis.context or '') # contains notes as well
38
+ assert (
39
+ vis.locator.href == 'https://hyp.is/_Z9ccmVZEeexBOO7mToqdg/www.wired.com/2017/04/the-myth-of-a-superhuman-ai/'
40
+ )
41
+ assert 'misconception about evolution is fueling misconception about AI' in (
42
+ vis.context or ''
43
+ ) # contains notes as well
@@ -14,7 +14,9 @@ def delrf(s: str | None) -> str | None:
14
14
 
15
15
 
16
16
  def test_org_indexer() -> None:
17
- [_, cpp, cozy] = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))]
17
+ [_, cpp, cozy] = [
18
+ v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file.org'))
19
+ ]
18
20
 
19
21
  assert cpp.url == 'https://www.youtube.com/watch?v=rHIkrotSwcc'
20
22
  # TODO not sure about filetags?
@@ -33,7 +35,10 @@ def test_org_indexer_2() -> None:
33
35
  items = [v if isinstance(v, Visit) else throw(v) for v in extract_from_file(get_testdata('auto/orgs/file3.org'))]
34
36
 
35
37
  assert len(items) == 6
36
- assert items[0].url == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
38
+ assert (
39
+ items[0].url
40
+ == 'https://www.reddit.com/r/androidapps/comments/4i36z9/how_you_use_your_android_to_the_maximum/d2uq24i'
41
+ )
37
42
  assert items[1].url == 'https://link.com'
38
43
  assert items[-2].url == 'https://en.wikipedia.org/wiki/Resilio_Sync'
39
44
  # TODO shit def need org specific url extractor (and then extract from everything remaining)
@@ -5,13 +5,15 @@ from ..common import get_testdata, unwrap
5
5
 
6
6
 
7
7
  def test_plaintext_path_extractor() -> None:
8
- visits = list(extract_visits(
9
- Source(
10
- shellcmd.index,
11
- plaintext.extract_from_path(get_testdata('custom')),
12
- ),
13
- src='whatever',
14
- ))
8
+ visits = list(
9
+ extract_visits(
10
+ Source(
11
+ shellcmd.index,
12
+ plaintext.extract_from_path(get_testdata('custom')),
13
+ ),
14
+ src='whatever',
15
+ )
16
+ )
15
17
  assert {unwrap(v).orig_url for v in visits} == {
16
18
  'http://google.com',
17
19
  'http://google.com/',
@@ -8,14 +8,15 @@ from ..common import get_testdata
8
8
 
9
9
  @pytest.mark.skipif(_is_windows, reason="no grep on windows")
10
10
  def test_via_grep() -> None:
11
-
12
- visits = list(extract_visits(
13
- Source(
14
- shellcmd.index,
15
- # meh. maybe should deprecate plain string here...
16
- r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
17
- ),
18
- src='whatever',
19
- ))
11
+ visits = list(
12
+ extract_visits(
13
+ Source(
14
+ shellcmd.index,
15
+ # meh. maybe should deprecate plain string here...
16
+ r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
17
+ ),
18
+ src='whatever',
19
+ )
20
+ )
20
21
  # TODO I guess filtering of equivalent urls should rather be tested on something having context (e.g. org mode)
21
22
  assert len(visits) == 5