promnesia 1.2.20240810__py3-none-any.whl → 1.4.20250909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. promnesia/__init__.py +18 -4
  2. promnesia/__main__.py +104 -78
  3. promnesia/cannon.py +108 -107
  4. promnesia/common.py +107 -88
  5. promnesia/compare.py +33 -30
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +37 -34
  8. promnesia/database/common.py +4 -3
  9. promnesia/database/dump.py +13 -13
  10. promnesia/database/load.py +7 -7
  11. promnesia/extract.py +19 -17
  12. promnesia/logging.py +27 -15
  13. promnesia/misc/install_server.py +32 -27
  14. promnesia/server.py +106 -79
  15. promnesia/sources/auto.py +104 -77
  16. promnesia/sources/auto_logseq.py +6 -5
  17. promnesia/sources/auto_obsidian.py +2 -2
  18. promnesia/sources/browser.py +20 -10
  19. promnesia/sources/browser_legacy.py +65 -50
  20. promnesia/sources/demo.py +7 -8
  21. promnesia/sources/fbmessenger.py +3 -3
  22. promnesia/sources/filetypes.py +22 -16
  23. promnesia/sources/github.py +9 -8
  24. promnesia/sources/guess.py +6 -2
  25. promnesia/sources/hackernews.py +7 -9
  26. promnesia/sources/hpi.py +5 -3
  27. promnesia/sources/html.py +11 -7
  28. promnesia/sources/hypothesis.py +3 -2
  29. promnesia/sources/instapaper.py +3 -2
  30. promnesia/sources/markdown.py +22 -12
  31. promnesia/sources/org.py +36 -17
  32. promnesia/sources/plaintext.py +41 -39
  33. promnesia/sources/pocket.py +5 -3
  34. promnesia/sources/reddit.py +24 -26
  35. promnesia/sources/roamresearch.py +5 -2
  36. promnesia/sources/rss.py +6 -8
  37. promnesia/sources/shellcmd.py +21 -11
  38. promnesia/sources/signal.py +27 -26
  39. promnesia/sources/smscalls.py +2 -3
  40. promnesia/sources/stackexchange.py +5 -4
  41. promnesia/sources/takeout.py +37 -34
  42. promnesia/sources/takeout_legacy.py +29 -19
  43. promnesia/sources/telegram.py +18 -12
  44. promnesia/sources/telegram_legacy.py +22 -11
  45. promnesia/sources/twitter.py +7 -6
  46. promnesia/sources/vcs.py +11 -6
  47. promnesia/sources/viber.py +11 -10
  48. promnesia/sources/website.py +8 -7
  49. promnesia/sources/zulip.py +3 -2
  50. promnesia/sqlite.py +13 -7
  51. promnesia/tests/common.py +10 -5
  52. promnesia/tests/server_helper.py +13 -10
  53. promnesia/tests/sources/test_auto.py +2 -3
  54. promnesia/tests/sources/test_filetypes.py +11 -8
  55. promnesia/tests/sources/test_hypothesis.py +10 -6
  56. promnesia/tests/sources/test_org.py +9 -5
  57. promnesia/tests/sources/test_plaintext.py +9 -8
  58. promnesia/tests/sources/test_shellcmd.py +13 -13
  59. promnesia/tests/sources/test_takeout.py +3 -5
  60. promnesia/tests/test_cannon.py +256 -239
  61. promnesia/tests/test_cli.py +12 -8
  62. promnesia/tests/test_compare.py +17 -13
  63. promnesia/tests/test_config.py +7 -8
  64. promnesia/tests/test_db_dump.py +15 -15
  65. promnesia/tests/test_extract.py +17 -10
  66. promnesia/tests/test_indexer.py +24 -18
  67. promnesia/tests/test_server.py +12 -13
  68. promnesia/tests/test_traverse.py +0 -2
  69. promnesia/tests/utils.py +3 -7
  70. promnesia-1.4.20250909.dist-info/METADATA +66 -0
  71. promnesia-1.4.20250909.dist-info/RECORD +80 -0
  72. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
  73. promnesia/kjson.py +0 -121
  74. promnesia/sources/__init__.pyi +0 -0
  75. promnesia-1.2.20240810.dist-info/METADATA +0 -54
  76. promnesia-1.2.20240810.dist-info/RECORD +0 -83
  77. promnesia-1.2.20240810.dist-info/top_level.txt +0 -1
  78. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
  79. {promnesia-1.2.20240810.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
@@ -1,19 +1,19 @@
1
- from datetime import datetime
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
+ from datetime import datetime, timezone
2
5
  from pathlib import Path
3
6
  from urllib.parse import unquote
4
- import sqlite3
5
- from typing import List, Set, Optional
6
7
 
7
- import pytz
8
-
9
- from ..common import PathIsh, Results, Visit, Loc, logger, Second, is_sqlite_db
10
- from .. import config
8
+ from promnesia import config
9
+ from promnesia.common import Loc, PathIsh, Results, Second, Visit, is_sqlite_db, logger
11
10
 
12
11
  try:
13
12
  from cachew import cachew
14
13
  except ModuleNotFoundError as me:
15
14
  if me.name != 'cachew':
16
15
  raise me
16
+
17
17
  # this module is legacy anyway, so just make it defensive
18
18
  def cachew(*args, **kwargs): # type: ignore[no-redef]
19
19
  return lambda f: f
@@ -21,7 +21,7 @@ except ModuleNotFoundError as me:
21
21
 
22
22
  def index(p: PathIsh) -> Results:
23
23
  pp = Path(p)
24
- assert pp.exists(), pp # just in case of broken symlinks
24
+ assert pp.exists(), pp # just in case of broken symlinks
25
25
 
26
26
  # todo warn if filtered out too many?
27
27
  # todo wonder how quickly mimes can be computed?
@@ -30,31 +30,31 @@ def index(p: PathIsh) -> Results:
30
30
 
31
31
  assert len(dbs) > 0, pp
32
32
  logger.info('processing %d databases', len(dbs))
33
- cname = str('_'.join(pp.parts[1:])) # meh
33
+ cname = str('_'.join(pp.parts[1:])) # meh
34
34
  yield from _index_dbs(dbs, cachew_name=cname)
35
35
 
36
36
 
37
-
38
- def _index_dbs(dbs: List[Path], cachew_name: str):
37
+ def _index_dbs(dbs: list[Path], cachew_name: str):
39
38
  # TODO right... not ideal, need to think how to handle it properly...
40
39
  import sys
40
+
41
41
  sys.setrecursionlimit(5000)
42
42
 
43
43
  cache_dir = config.get().cache_dir
44
44
  cpath = None if cache_dir is None else cache_dir / cachew_name
45
- emitted: Set = set()
45
+ emitted: set = set()
46
46
  yield from _index_dbs_aux(cpath, dbs, emitted=emitted)
47
47
 
48
48
 
49
49
  # todo wow, stack traces are ridiculous here...
50
50
  # todo hmm, feels like it should be a class or something?
51
- @cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger)
52
- def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) -> Results:
51
+ @cachew(lambda cp, dbs, emitted: cp, depends_on=lambda cp, dbs, emitted: dbs) # , logger=logger) # noqa: ARG005
52
+ def _index_dbs_aux(cache_path: Path | None, dbs: list[Path], emitted: set) -> Results:
53
53
  if len(dbs) == 0:
54
54
  return
55
55
 
56
56
  xs = dbs[:-1]
57
- x = dbs[-1:]
57
+ x = dbs[-1:]
58
58
 
59
59
  xs_res = _index_dbs_aux(cache_path, xs, emitted)
60
60
  xs_was_cached = False
@@ -65,36 +65,38 @@ def _index_dbs_aux(cache_path: Optional[Path], dbs: List[Path], emitted: Set) ->
65
65
  logger.debug('seems that %d first items were previously cached', len(xs))
66
66
  if xs_was_cached:
67
67
  key = str(r) if isinstance(r, Exception) else (r.url, r.dt)
68
- assert key not in emitted, key # todo not sure if this assert is necessary?
68
+ assert key not in emitted, key # todo not sure if this assert is necessary?
69
69
  # hmm ok it might happen if we messed up with indexing individual db?
70
70
  # alternatively, could abuse it to avoid messing with 'emitted' in _index_db?
71
71
  emitted.add(key)
72
- yield r # todo not sure about exceptions?
72
+ yield r # todo not sure about exceptions?
73
73
 
74
74
  for db in x:
75
75
  yield from _index_db(db, emitted=emitted)
76
76
 
77
77
 
78
- def _index_db(db: Path, emitted: Set):
79
- logger.info('processing %s', db) # debug level?
78
+ def _index_db(db: Path, emitted: set):
79
+ logger.info('processing %s', db) # debug level?
80
80
 
81
81
  # todo schema check (not so critical for cachew though)
82
82
  total = 0
83
- new = 0
84
- loc = Loc.file(db) # todo possibly needs to be optimized -- moving from within the loop considerably speeds everything up
83
+ new = 0
84
+ loc = Loc.file(
85
+ db
86
+ ) # todo possibly needs to be optimized -- moving from within the loop considerably speeds everything up
85
87
  with sqlite3.connect(f'file:{db}?immutable=1', uri=True) as c:
86
88
  browser = None
87
89
  for b in [Chrome, Firefox, FirefoxPhone, Safari]:
88
90
  try:
89
91
  c.execute(f'SELECT * FROM {b.detector}')
90
- except sqlite3.OperationalError: # not sure if the right kind?
92
+ except sqlite3.OperationalError: # not sure if the right kind?
91
93
  pass
92
94
  else:
93
95
  browser = b
94
96
  break
95
97
  assert browser is not None
96
98
 
97
- proj = ', '.join(c for c, _ in browser.schema.cols)
99
+ proj = ', '.join(c for c, _ in browser.schema.cols)
98
100
  query = browser.query.replace('chunk.', '')
99
101
 
100
102
  c.row_factory = sqlite3.Row
@@ -121,17 +123,20 @@ Col = str
121
123
  ColType = str
122
124
 
123
125
 
124
- from typing import Any, NamedTuple, Tuple, Union, Sequence, Optional
126
+ from collections.abc import Sequence
127
+ from typing import NamedTuple
128
+
125
129
 
126
130
  class Schema(NamedTuple):
127
- cols: Sequence[Tuple[Col, ColType]]
131
+ cols: Sequence[tuple[Col, ColType]]
128
132
  key: Sequence[str]
129
133
 
130
134
 
131
- SchemaCheck = Tuple[str, Union[str, Sequence[str]]] # todo Union: meh
135
+ SchemaCheck = tuple[str, str | Sequence[str]] # todo Union: meh
132
136
 
133
137
  from dataclasses import dataclass
134
138
 
139
+
135
140
  # todo protocol?
136
141
  @dataclass
137
142
  class Extr:
@@ -147,14 +152,15 @@ class Extr:
147
152
 
148
153
 
149
154
  class Chrome(Extr):
150
- detector='keyword_search_terms'
155
+ detector = 'keyword_search_terms'
156
+ # fmt: off
151
157
  schema_check=(
152
158
  'visits', [
153
159
  'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration, incremented_omnibox_typed_score",
154
160
  'visits', "id, url, visit_time, from_visit, transition, segment_id, visit_duration"
155
161
  ]
156
162
  )
157
- schema=Schema(cols=[
163
+ schema = Schema(cols=[
158
164
  ('U.url' , 'TEXT' ),
159
165
 
160
166
  # while these two are not very useful, might be good to have just in case for some debugging
@@ -168,18 +174,19 @@ class Chrome(Extr):
168
174
  ('V.visit_duration' , 'INTEGER NOT NULL'),
169
175
  # V.omnibox thing looks useless
170
176
  ], key=('url', 'visit_time', 'vid', 'urlid'))
171
- query='FROM chunk.visits as V, chunk.urls as U WHERE V.url = U.id'
177
+ # fmt: on
178
+ query = 'FROM chunk.visits as V, chunk.urls as U WHERE V.url = U.id'
172
179
 
173
180
  @staticmethod
174
181
  def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
175
- url = row['url']
176
- ts = row['visit_time']
182
+ url = row['url']
183
+ ts = row['visit_time']
177
184
  durs = row['visit_duration']
178
185
 
179
186
  dt = chrome_time_to_utc(int(ts))
180
- url = unquote(url) # chrome urls are all quoted
187
+ url = unquote(url) # chrome urls are all quoted
181
188
  dd = int(durs)
182
- dur: Optional[Second] = None if dd == 0 else dd // 1_000_000
189
+ dur: Second | None = None if dd == 0 else dd // 1_000_000
183
190
  return Visit(
184
191
  url=url,
185
192
  dt=dt,
@@ -192,12 +199,12 @@ class Chrome(Extr):
192
199
  # yep, tested it and looks like utc
193
200
  def chrome_time_to_utc(chrome_time: int) -> datetime:
194
201
  epoch = (chrome_time / 1_000_000) - 11644473600
195
- return datetime.fromtimestamp(epoch, pytz.utc)
202
+ return datetime.fromtimestamp(epoch, timezone.utc)
196
203
 
197
204
 
198
205
  def _row2visit_firefox(row: sqlite3.Row, loc: Loc) -> Visit:
199
206
  url = row['url']
200
- ts = float(row['visit_date'])
207
+ ts = float(row['visit_date'])
201
208
  # ok, looks like it's unix epoch
202
209
  # https://stackoverflow.com/a/19430099/706389
203
210
 
@@ -210,17 +217,19 @@ def _row2visit_firefox(row: sqlite3.Row, loc: Loc) -> Visit:
210
217
  else:
211
218
  # milliseconds
212
219
  ts /= 1_000
213
- dt = datetime.fromtimestamp(ts, pytz.utc)
214
- url = unquote(url) # firefox urls are all quoted
220
+ dt = datetime.fromtimestamp(ts, timezone.utc)
221
+ url = unquote(url) # firefox urls are all quoted
215
222
  return Visit(
216
223
  url=url,
217
224
  dt=dt,
218
225
  locator=loc,
219
226
  )
220
227
 
228
+
221
229
  # https://web.archive.org/web/20201026130310/http://fileformats.archiveteam.org/wiki/History.db
222
230
  class Safari(Extr):
223
- detector='history_tombstones'
231
+ detector = 'history_tombstones'
232
+ # fmt: off
224
233
  schema_check=(
225
234
  'history_visits', [
226
235
  'history_visits', "id, history_item, visit_time",
@@ -241,13 +250,14 @@ class Safari(Extr):
241
250
  # ('V.visit_duration' , 'INTEGER NOT NULL'),
242
251
  # V.omnibox thing looks useless
243
252
  ], key=('url', 'visit_time', 'vid', 'urlid'))
244
- query='FROM chunk.history_visits as V, chunk.history_items as U WHERE V.history_item = U.id'
253
+ # fmt: on
254
+ query = 'FROM chunk.history_visits as V, chunk.history_items as U WHERE V.history_item = U.id'
245
255
 
246
256
  @staticmethod
247
257
  def row2visit(row: sqlite3.Row, loc: Loc) -> Visit:
248
- url = row['url']
249
- ts = row['visit_time'] + 978307200 # https://stackoverflow.com/a/34546556/16645
250
- dt = datetime.fromtimestamp(ts, pytz.utc)
258
+ url = row['url']
259
+ ts = row['visit_time'] + 978307200 # https://stackoverflow.com/a/34546556/16645
260
+ dt = datetime.fromtimestamp(ts, timezone.utc)
251
261
 
252
262
  return Visit(
253
263
  url=url,
@@ -255,10 +265,12 @@ class Safari(Extr):
255
265
  locator=loc,
256
266
  )
257
267
 
268
+
258
269
  # https://web.archive.org/web/20190730231715/https://www.forensicswiki.org/wiki/Mozilla_Firefox_3_History_File_Format#moz_historyvisits
259
270
  class Firefox(Extr):
260
- detector='moz_meta'
261
- schema_check=('moz_historyvisits', "id, from_visit, place_id, visit_date, visit_type")
271
+ detector = 'moz_meta'
272
+ schema_check = ('moz_historyvisits', "id, from_visit, place_id, visit_date, visit_type")
273
+ # fmt: off
262
274
  schema=Schema(cols=[
263
275
  ('P.url' , 'TEXT'),
264
276
 
@@ -274,14 +286,16 @@ class Firefox(Extr):
274
286
  # needs to be defensive
275
287
  # ('V.session' , 'INTEGER'),
276
288
  ], key=('url', 'visit_date', 'vid', 'pid'))
277
- query='FROM chunk.moz_historyvisits as V, chunk.moz_places as P WHERE V.place_id = P.id'
289
+ # fmt: on
290
+ query = 'FROM chunk.moz_historyvisits as V, chunk.moz_places as P WHERE V.place_id = P.id'
278
291
 
279
- row2visit = _row2visit_firefox
292
+ row2visit = _row2visit_firefox # type: ignore[assignment]
280
293
 
281
294
 
282
295
  class FirefoxPhone(Extr):
283
- detector='remote_devices'
284
- schema_check=('visits', "_id, history_guid, visit_type, date, is_local")
296
+ detector = 'remote_devices'
297
+ schema_check = ('visits', "_id, history_guid, visit_type, date, is_local")
298
+ # fmt: off
285
299
  schema=Schema(cols=[
286
300
  ('H.url' , 'TEXT NOT NULL' ),
287
301
 
@@ -293,6 +307,7 @@ class FirefoxPhone(Extr):
293
307
  ('V.date as visit_date', 'INTEGER NOT NULL'),
294
308
  # ('is_local' , 'INTEGER NOT NULL'),
295
309
  ], key=('url', 'date', 'vid', 'hid'))
296
- query='FROM chunk.visits as V, chunk.history as H WHERE V.history_guid = H.guid'
310
+ # fmt: on
311
+ query = 'FROM chunk.visits as V, chunk.history as H WHERE V.history_guid = H.guid'
297
312
 
298
- row2visit = _row2visit_firefox
313
+ row2visit = _row2visit_firefox # type: ignore[assignment]
promnesia/sources/demo.py CHANGED
@@ -3,11 +3,11 @@ A dummy source, used for testing
3
3
  Generates a sequence of fake evenly separated visits
4
4
  '''
5
5
 
6
- from datetime import datetime, timedelta
7
- from typing import Union
6
+ from __future__ import annotations
8
7
 
9
- from ..common import Results, Visit, Loc
8
+ from datetime import datetime, timedelta
10
9
 
10
+ from promnesia.common import Loc, Results, Visit
11
11
 
12
12
  IsoFormatDt = str
13
13
  Seconds = int
@@ -16,12 +16,11 @@ Seconds = int
16
16
  # TODO allow passing isoformat string as base_dt?
17
17
  # and maybe something similar as delta? start with seconds maybe
18
18
  def index(
19
- count: int=100,
20
- *,
21
- base_dt: Union[datetime, IsoFormatDt] = datetime.min + timedelta(days=5000),
22
- delta: Union[timedelta, Seconds] = timedelta(hours=1),
19
+ count: int = 100,
20
+ *,
21
+ base_dt: datetime | IsoFormatDt = datetime.min + timedelta(days=5000),
22
+ delta: timedelta | Seconds = timedelta(hours=1),
23
23
  ) -> Results:
24
-
25
24
  base_dt_ = base_dt if isinstance(base_dt, datetime) else datetime.fromisoformat(base_dt)
26
25
  delta_ = delta if isinstance(delta, timedelta) else timedelta(seconds=delta)
27
26
 
@@ -2,12 +2,13 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] for the messages data.
3
3
  '''
4
4
 
5
- from ..common import Results, Visit, Loc, extract_urls
5
+ from promnesia.common import Loc, Results, Visit, extract_urls
6
6
 
7
7
 
8
8
  def index() -> Results:
9
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
10
10
  from my.fbmessenger import messages
11
+
11
12
  for m in messages():
12
13
  if isinstance(m, Exception):
13
14
  yield m
@@ -32,4 +33,3 @@ def index() -> Results:
32
33
  context=m.text,
33
34
  locator=loc,
34
35
  )
35
-
@@ -1,38 +1,42 @@
1
- #!/usr/bin/env python3
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterable, Sequence
2
4
  from functools import lru_cache
3
5
  from pathlib import Path
4
- from typing import Dict, Callable, Optional, Sequence, NamedTuple, Union, Iterable
6
+ from typing import NamedTuple
5
7
 
6
8
  from ..common import Results, Url
7
9
 
8
-
9
10
  # TODO doesn't really belong here...
10
11
  Ctx = Sequence[str]
11
12
 
13
+
12
14
  class EUrl(NamedTuple):
13
15
  url: Url
14
- ctx: Ctx # TODO ctx here is more like a Loc
16
+ ctx: Ctx # TODO ctx here is more like a Loc
17
+
18
+
15
19
  ###
16
20
 
17
21
 
18
22
  # keys are mime types + extensions
19
- Ex = Callable[[Path], Union[Results, Iterable[EUrl]]]
23
+ Ex = Callable[[Path], Results | Iterable[EUrl]]
20
24
  # None means unhandled
21
- TYPE2IDX: Dict[str, Optional[Ex]] = {}
25
+ TYPE2IDX: dict[str, Ex | None] = {}
22
26
  # NOTE: there are some types in auto.py at the moment... it's a bit messy
23
27
 
24
28
 
25
29
  # TYPE2IDX only contains the 'prefixes', to speed up the lookup we are using cache..
26
30
  @lru_cache(None)
27
- def type2idx(t: str) -> Optional[Ex]:
31
+ def type2idx(t: str) -> Ex | None:
28
32
  if len(t) == 0:
29
- return None # just in case?
33
+ return None # just in case?
30
34
  # first try exact match
31
- e = TYPE2IDX.get(t, None)
35
+ e = TYPE2IDX.get(t)
32
36
  if e is not None:
33
37
  return e
34
38
  t = t.strip('.')
35
- e = TYPE2IDX.get(t, None)
39
+ e = TYPE2IDX.get(t)
36
40
  if e is not None:
37
41
  return e
38
42
  # otherwise, try prefixes?
@@ -41,6 +45,7 @@ def type2idx(t: str) -> Optional[Ex]:
41
45
  return v
42
46
  return None
43
47
 
48
+
44
49
  # for now source code just indexed with grep, not sure if it's good enough?
45
50
  # if not, some fanceir library could be used...
46
51
  # e.g. https://github.com/karlicoss/promnesia/pull/152/commits/c2f00eb4ee4018b02c9bf3966a036db69a43373d
@@ -81,7 +86,7 @@ CODE = {
81
86
 
82
87
  '.ts', # most likely typescript.. otherwise determined as text/vnd.trolltech.linguist mime
83
88
  '.js',
84
- }
89
+ } # fmt: skip
85
90
  # TODO discover more extensions with mimetypes library?
86
91
 
87
92
 
@@ -97,9 +102,10 @@ audio/
97
102
  video/
98
103
  '''
99
104
 
100
- handle_later = lambda *args, **kwargs: ()
105
+ handle_later = lambda *_args, **_kwargs: ()
101
106
 
102
- def ignore(*args, **kwargs):
107
+
108
+ def ignore(*_args, **_kwargs):
103
109
  # TODO log (once?)
104
110
  yield from ()
105
111
 
@@ -121,13 +127,14 @@ TYPE2IDX.update({
121
127
  'font/woff': ignore,
122
128
  'text/x-Algol68': ignore, # ugh some license file had this?? maybe always index text/ as text?
123
129
  'text/x-bytecode.python': ignore, # todo ignore all x-bytecode?
130
+ 'text/calendar': ignore,
124
131
 
125
132
  # TODO not sure what to do about these..
126
133
  'application/octet-stream': handle_later,
127
134
  'application/zip' : handle_later,
128
135
  'application/x-tar' : handle_later,
129
136
  'application/gzip' : handle_later,
130
- })
137
+ }) # fmt: skip
131
138
 
132
139
 
133
140
  # TODO use some existing file for initial gitignore..
@@ -146,5 +153,4 @@ IGNORE = [
146
153
  # TODO not sure about these:
147
154
  '.gitignore',
148
155
  '.babelrc',
149
- ]
150
-
156
+ ] # fmt: skip
@@ -2,15 +2,14 @@
2
2
  Uses [[https://github.com/karlicoss/HPI][HPI]] github module
3
3
  '''
4
4
 
5
- # Note: requires the 'mistletoe' module if you enable render_markdown
6
-
7
- from typing import Optional, Set
5
+ from __future__ import annotations
8
6
 
9
- from ..common import Results, Visit, Loc, iter_urls, logger
7
+ # Note: requires the 'mistletoe' module if you enable render_markdown
8
+ from promnesia.common import Loc, Results, Visit, iter_urls, logger
10
9
 
11
10
 
12
11
  def index(*, render_markdown: bool = False) -> Results:
13
- from . import hpi
12
+ from . import hpi # noqa: F401,I001
14
13
  from my.github.all import events
15
14
 
16
15
  if render_markdown:
@@ -18,7 +17,9 @@ def index(*, render_markdown: bool = False) -> Results:
18
17
  from .markdown import TextParser, extract_from_text
19
18
  except ImportError as import_err:
20
19
  logger.exception(import_err)
21
- logger.critical("Could not import markdown module to render github body markdown. Try 'python3 -m pip install mistletoe'")
20
+ logger.critical(
21
+ "Could not import markdown module to render github body markdown. Try 'python3 -m pip install mistletoe'"
22
+ )
22
23
  render_markdown = False
23
24
 
24
25
  for e in events():
@@ -29,7 +30,7 @@ def index(*, render_markdown: bool = False) -> Results:
29
30
  continue
30
31
 
31
32
  # if enabled, convert the (markdown) body to HTML
32
- context: Optional[str] = e.body
33
+ context: str | None = e.body
33
34
  if e.body is not None and render_markdown:
34
35
  context = TextParser(e.body)._doc_ashtml() # type: ignore[possibly-undefined]
35
36
 
@@ -59,7 +60,7 @@ def index(*, render_markdown: bool = False) -> Results:
59
60
  #
60
61
  # Note: this set gets reset every event, is here to
61
62
  # prevent duplicates between URLExtract and the markdown parser
62
- emitted: Set[str] = set()
63
+ emitted: set[str] = set()
63
64
  for url in iter_urls(e.body):
64
65
  if url in emitted:
65
66
  continue
@@ -1,6 +1,7 @@
1
1
  # TODO eh. confusing how guess and auto are different...
2
2
  # maybe merge them later?
3
- from typing import Iterable, Any
3
+ from collections.abc import Iterable
4
+ from typing import Any
4
5
 
5
6
  from ..common import Extraction, PathIsh
6
7
 
@@ -21,14 +22,17 @@ def index(path: PathIsh, *args, **kwargs) -> Iterable[Extraction]:
21
22
  ps = str(path)
22
23
  # TODO better url detection
23
24
 
24
- index_: Any # meh
25
+ index_: Any # meh
25
26
  if is_git_repo(ps):
26
27
  from . import vcs
28
+
27
29
  index_ = vcs.index
28
30
  elif is_website(ps):
29
31
  from . import website
32
+
30
33
  index_ = website.index
31
34
  else:
32
35
  from . import auto
36
+
33
37
  index_ = auto.index
34
38
  yield from index_(path, *args, **kwargs)
@@ -4,11 +4,11 @@ Uses [[https://github.com/karlicoss/HPI][HPI]] dogsheep module to import HackerN
4
4
 
5
5
  import textwrap
6
6
 
7
- from promnesia.common import Visit, Loc, Results
7
+ from promnesia.common import Loc, Results, Visit
8
8
 
9
9
 
10
10
  def index() -> Results:
11
- from . import hpi
11
+ from . import hpi # noqa: F401,I001
12
12
  from my.hackernews import dogsheep
13
13
 
14
14
  for item in dogsheep.items():
@@ -21,9 +21,7 @@ def index() -> Results:
21
21
  title = item.title
22
22
  elif item.text_html:
23
23
  title = item.text_html
24
- title = textwrap.shorten(
25
- title, width=79, placeholder="…",
26
- break_long_words=True)
24
+ title = textwrap.shorten(title, width=79, placeholder="…", break_long_words=True)
27
25
  # The locator is always the HN story. If the story is a link (as
28
26
  # opposed to a text post), we insert a visit such that the link
29
27
  # will point back to the corresponding HN story.
@@ -33,8 +31,8 @@ def index() -> Results:
33
31
  urls.append(item.url)
34
32
  for url in urls:
35
33
  yield Visit(
36
- url=url,
37
- dt=item.created,
38
- locator=loc,
39
- context=title,
34
+ url=url,
35
+ dt=item.created,
36
+ locator=loc,
37
+ context=title,
40
38
  )
promnesia/sources/hpi.py CHANGED
@@ -2,10 +2,12 @@
2
2
  Just a helper for a more humane error message when importing my.* dependencies
3
3
  '''
4
4
 
5
- from ..common import logger
5
+ from promnesia.common import logger
6
6
 
7
7
  try:
8
- import my
8
+ import my # noqa: F401
9
9
  except ImportError as e:
10
10
  logger.exception(e)
11
- logger.critical("Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')")
11
+ logger.critical(
12
+ "Failed during 'import my'. You probably need to install & configure HPI package first (see 'https://github.com/karlicoss/HPI/blob/master/doc/SETUP.org')"
13
+ )
promnesia/sources/html.py CHANGED
@@ -2,19 +2,21 @@
2
2
  Extracts links from HTML files
3
3
  '''
4
4
 
5
- from pathlib import Path
6
- from typing import Iterator, Tuple
5
+ from __future__ import annotations
7
6
 
8
- from ..common import PathIsh, Visit, Loc, Results, file_mtime
7
+ from collections.abc import Iterator
8
+ from pathlib import Path
9
9
 
10
- from bs4 import BeautifulSoup
10
+ from bs4 import BeautifulSoup, Tag
11
11
 
12
+ from promnesia.common import Loc, PathIsh, Results, Visit, file_mtime
12
13
 
13
- # TODO present error summary in the very end; import errors -- makes sense to show
14
+ # TODO present error summary in the very end; import errors -- makes sense to show
14
15
  # TODO on some exceptions, request a fallback to text?
15
16
 
16
17
 
17
- Url = Tuple[str, str]
18
+ Url = tuple[str, str]
19
+
18
20
 
19
21
  def extract_urls_from_html(s: str) -> Iterator[Url]:
20
22
  """
@@ -23,11 +25,13 @@ def extract_urls_from_html(s: str) -> Iterator[Url]:
23
25
  """
24
26
  soup = BeautifulSoup(s, 'lxml')
25
27
  for a in soup.find_all('a'):
28
+ assert isinstance(a, Tag), a # make mypy happy
26
29
  href = a.attrs.get('href')
27
30
  if href is None or ('://' not in href):
28
31
  # second condition means relative link
29
32
  continue
30
- text = a.text
33
+ assert isinstance(href, str), href # make mypy happy
34
+ text: str = a.text
31
35
  yield (href, text)
32
36
 
33
37
 
@@ -1,11 +1,12 @@
1
1
  """
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myhypothesis][hypothesis]] module
3
3
  """
4
- from ..common import Loc, Results, Visit, extract_urls, join_tags
4
+
5
+ from promnesia.common import Loc, Results, Visit, extract_urls, join_tags
5
6
 
6
7
 
7
8
  def index() -> Results:
8
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
9
10
  import my.hypothesis as hyp
10
11
 
11
12
  for h in hyp.highlights():
@@ -1,11 +1,12 @@
1
1
  '''
2
2
  Uses HPI [[https://github.com/karlicoss/HPI/blob/master/doc/MODULES.org#myinstapaper][instapaper]] module
3
3
  '''
4
- from ..common import Results, logger, Visit, Loc
4
+
5
+ from promnesia.common import Loc, Results, Visit
5
6
 
6
7
 
7
8
  def index() -> Results:
8
- from . import hpi
9
+ from . import hpi # noqa: F401,I001
9
10
  import my.instapaper as ip
10
11
 
11
12
  for p in ip.pages():