promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +60 -35
- promnesia/cannon.py +27 -27
- promnesia/common.py +85 -67
- promnesia/compare.py +21 -22
- promnesia/compat.py +10 -10
- promnesia/config.py +23 -23
- promnesia/database/common.py +67 -0
- promnesia/database/dump.py +188 -0
- promnesia/{read_db.py → database/load.py} +16 -17
- promnesia/extract.py +14 -11
- promnesia/kjson.py +12 -11
- promnesia/logging.py +4 -4
- promnesia/misc/__init__.pyi +0 -0
- promnesia/misc/config_example.py +1 -2
- promnesia/misc/install_server.py +7 -9
- promnesia/server.py +57 -47
- promnesia/sources/__init__.pyi +0 -0
- promnesia/sources/auto.py +50 -35
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +26 -16
- promnesia/sources/demo.py +19 -3
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +16 -7
- promnesia/sources/github.py +7 -9
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +4 -3
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +31 -21
- promnesia/sources/org.py +27 -13
- promnesia/sources/plaintext.py +30 -29
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +20 -19
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +33 -24
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +4 -3
- promnesia/sources/takeout.py +76 -9
- promnesia/sources/takeout_legacy.py +24 -12
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/__init__.py +0 -0
- promnesia/tests/common.py +140 -0
- promnesia/tests/server_helper.py +67 -0
- promnesia/tests/sources/__init__.py +0 -0
- promnesia/tests/sources/test_auto.py +65 -0
- promnesia/tests/sources/test_filetypes.py +43 -0
- promnesia/tests/sources/test_hypothesis.py +39 -0
- promnesia/tests/sources/test_org.py +64 -0
- promnesia/tests/sources/test_plaintext.py +25 -0
- promnesia/tests/sources/test_shellcmd.py +21 -0
- promnesia/tests/sources/test_takeout.py +56 -0
- promnesia/tests/test_cannon.py +325 -0
- promnesia/tests/test_cli.py +40 -0
- promnesia/tests/test_compare.py +30 -0
- promnesia/tests/test_config.py +289 -0
- promnesia/tests/test_db_dump.py +222 -0
- promnesia/tests/test_extract.py +65 -0
- promnesia/tests/test_extract_urls.py +43 -0
- promnesia/tests/test_indexer.py +251 -0
- promnesia/tests/test_server.py +291 -0
- promnesia/tests/test_traverse.py +39 -0
- promnesia/tests/utils.py +35 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
- promnesia/dump.py +0 -105
- promnesia-1.2.20230515.dist-info/RECORD +0 -58
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
promnesia/__init__.py
CHANGED
@@ -1,6 +1,17 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res
|
3
|
-
|
4
1
|
# add deprecation warning so eventually this may converted to a namespace package?
|
5
2
|
import warnings
|
3
|
+
|
4
|
+
from .common import ( # noqa: F401
|
5
|
+
Context,
|
6
|
+
DbVisit,
|
7
|
+
Loc,
|
8
|
+
PathIsh,
|
9
|
+
Res,
|
10
|
+
Results,
|
11
|
+
Source,
|
12
|
+
Visit,
|
13
|
+
last,
|
14
|
+
)
|
15
|
+
|
16
|
+
# TODO think again about it -- what are the pros and cons?
|
6
17
|
warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning)
|
promnesia/__main__.py
CHANGED
@@ -4,24 +4,35 @@ import argparse
|
|
4
4
|
import ast
|
5
5
|
import importlib
|
6
6
|
import inspect
|
7
|
-
|
7
|
+
import os
|
8
|
+
import shlex
|
8
9
|
import shutil
|
9
|
-
from subprocess import run, check_call, Popen
|
10
10
|
import sys
|
11
|
-
from
|
12
|
-
from
|
13
|
-
|
14
|
-
|
15
|
-
from
|
16
|
-
|
17
|
-
from .
|
18
|
-
from .common import
|
19
|
-
|
20
|
-
|
11
|
+
from collections.abc import Iterable, Iterator, Sequence
|
12
|
+
from pathlib import Path
|
13
|
+
from subprocess import Popen, check_call, run
|
14
|
+
from tempfile import TemporaryDirectory, gettempdir
|
15
|
+
from typing import Callable
|
16
|
+
|
17
|
+
from . import config, server
|
18
|
+
from .common import (
|
19
|
+
DbVisit,
|
20
|
+
Extractor,
|
21
|
+
PathIsh,
|
22
|
+
Res,
|
23
|
+
Source,
|
24
|
+
default_config_path,
|
25
|
+
get_system_tz,
|
26
|
+
get_tmpdir,
|
27
|
+
logger,
|
28
|
+
user_config_file,
|
29
|
+
)
|
30
|
+
from .database.dump import visits_to_sqlite
|
21
31
|
from .extract import extract_visits
|
32
|
+
from .misc import install_server
|
22
33
|
|
23
34
|
|
24
|
-
def iter_all_visits(sources_subset: Iterable[
|
35
|
+
def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]:
|
25
36
|
cfg = config.get()
|
26
37
|
output_dir = cfg.output_dir
|
27
38
|
# not sure if belongs here??
|
@@ -73,7 +84,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
|
|
73
84
|
logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
|
74
85
|
|
75
86
|
|
76
|
-
def _do_index(dry: bool=False, sources_subset: Iterable[
|
87
|
+
def _do_index(*, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False) -> Iterable[Exception]:
|
77
88
|
# also keep & return errors for further display
|
78
89
|
errors: list[Exception] = []
|
79
90
|
def it() -> Iterable[Res[DbVisit]]:
|
@@ -96,29 +107,32 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
|
|
96
107
|
|
97
108
|
|
98
109
|
def do_index(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
110
|
+
config_file: Path,
|
111
|
+
*,
|
112
|
+
dry: bool = False,
|
113
|
+
sources_subset: Iterable[str | int] = (),
|
114
|
+
overwrite_db: bool = False,
|
115
|
+
) -> Sequence[Exception]:
|
104
116
|
config.load_from(config_file) # meh.. should be cleaner
|
105
117
|
try:
|
106
118
|
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
|
107
119
|
finally:
|
120
|
+
# this reset is mainly for tests, so we don't end up reusing the same config by accident
|
108
121
|
config.reset()
|
109
122
|
if len(errors) > 0:
|
110
123
|
logger.error('%d errors, printing them out:', len(errors))
|
111
124
|
for e in errors:
|
112
125
|
logger.exception(e)
|
113
126
|
logger.error('%d errors, exit code 1', len(errors))
|
114
|
-
|
127
|
+
return errors
|
115
128
|
|
116
129
|
|
117
130
|
def demo_sources() -> dict[str, Callable[[], Extractor]]:
|
118
131
|
def lazy(name: str) -> Callable[[], Extractor]:
|
119
132
|
# helper to avoid failed imports etc, since people might be lacking necessary dependencies
|
120
133
|
def inner() -> Extractor:
|
121
|
-
|
134
|
+
# TODO why this import??
|
135
|
+
from . import sources # noqa: F401
|
122
136
|
module = importlib.import_module(f'promnesia.sources.{name}')
|
123
137
|
return getattr(module, 'index')
|
124
138
|
return inner
|
@@ -143,7 +157,7 @@ def do_demo(
|
|
143
157
|
config_file: Path | None,
|
144
158
|
dry: bool=False,
|
145
159
|
name: str='demo',
|
146
|
-
sources_subset: Iterable[
|
160
|
+
sources_subset: Iterable[str | int]=(),
|
147
161
|
overwrite_db: bool=False,
|
148
162
|
) -> None:
|
149
163
|
with TemporaryDirectory() as tdir:
|
@@ -216,20 +230,29 @@ def config_check(args: argparse.Namespace) -> None:
|
|
216
230
|
def _config_check(cfg: Path) -> Iterable[Exception]:
|
217
231
|
logger.info('config: %s', cfg)
|
218
232
|
|
219
|
-
def check(cmd: list[str | Path]) -> Iterable[Exception]:
|
220
|
-
logger.debug(
|
221
|
-
res = run(cmd)
|
233
|
+
def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
|
234
|
+
logger.debug(shlex.join(map(str, cmd)))
|
235
|
+
res = run(cmd, **kwargs) # noqa: PLW1510
|
222
236
|
if res.returncode > 0:
|
237
|
+
# TODO what's up with empty exception??
|
223
238
|
yield Exception()
|
224
239
|
|
225
240
|
logger.info('Checking syntax...')
|
226
241
|
cmd: list[str | Path] = [sys.executable, '-m', 'compileall', cfg]
|
227
|
-
yield from check(
|
242
|
+
yield from check(
|
243
|
+
cmd,
|
244
|
+
env={
|
245
|
+
**os.environ,
|
246
|
+
# if config is on read only partition, the command would fail due to generated bytecode
|
247
|
+
# so put it in the temporary directory instead
|
248
|
+
'PYTHONPYCACHEPREFIX': gettempdir()
|
249
|
+
},
|
250
|
+
)
|
228
251
|
|
229
252
|
# todo not sure if should be more defensive than check_call here
|
230
253
|
logger.info('Checking type safety...')
|
231
254
|
try:
|
232
|
-
import mypy
|
255
|
+
import mypy # noqa: F401
|
233
256
|
except ImportError:
|
234
257
|
logger.warning("mypy not found, can't use it to check config!")
|
235
258
|
else:
|
@@ -281,7 +304,7 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
|
|
281
304
|
logger.info('You should see the database path and version above!')
|
282
305
|
|
283
306
|
|
284
|
-
def _ordinal_or_name(s: str) ->
|
307
|
+
def _ordinal_or_name(s: str) -> str | int:
|
285
308
|
try:
|
286
309
|
s = int(s) # type: ignore
|
287
310
|
except ValueError:
|
@@ -317,14 +340,14 @@ def main() -> None:
|
|
317
340
|
)
|
318
341
|
|
319
342
|
F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
|
320
|
-
p = argparse.ArgumentParser(formatter_class=F)
|
321
|
-
subp = p.add_subparsers(dest='mode'
|
343
|
+
p = argparse.ArgumentParser(formatter_class=F)
|
344
|
+
subp = p.add_subparsers(dest='mode' )
|
322
345
|
ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
|
323
346
|
add_index_args(ep, default_config_path())
|
324
347
|
# TODO use some way to override or provide config only via cmdline?
|
325
348
|
ep.add_argument('--intermediate', required=False, help="Used for development, you don't need it")
|
326
349
|
|
327
|
-
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F)
|
350
|
+
sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F)
|
328
351
|
server.setup_parser(sp)
|
329
352
|
|
330
353
|
ap = subp.add_parser('demo', help='Demo mode: index and serve a directory in single command', formatter_class=F)
|
@@ -338,7 +361,7 @@ def main() -> None:
|
|
338
361
|
ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server')
|
339
362
|
ap.add_argument(
|
340
363
|
'--as',
|
341
|
-
choices=
|
364
|
+
choices=sorted(demo_sources().keys()),
|
342
365
|
default='guess',
|
343
366
|
help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
|
344
367
|
)
|
@@ -349,7 +372,7 @@ def main() -> None:
|
|
349
372
|
install_server.setup_parser(isp)
|
350
373
|
|
351
374
|
cp = subp.add_parser('config', help='Config management')
|
352
|
-
cp.set_defaults(func=lambda *
|
375
|
+
cp.set_defaults(func=lambda *_args: cp.print_help())
|
353
376
|
scp = cp.add_subparsers()
|
354
377
|
ccp = scp.add_parser('check', help='Check config')
|
355
378
|
ccp.set_defaults(func=config_check)
|
@@ -363,7 +386,7 @@ def main() -> None:
|
|
363
386
|
|
364
387
|
dp = subp.add_parser('doctor', help='Troubleshooting assistant')
|
365
388
|
dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
|
366
|
-
dp.set_defaults(func=lambda *
|
389
|
+
dp.set_defaults(func=lambda *_args: dp.print_help())
|
367
390
|
sdp = dp.add_subparsers()
|
368
391
|
sdp.add_parser('config' , help='Check config' ).set_defaults(func=config_check )
|
369
392
|
sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db)
|
@@ -388,12 +411,14 @@ def main() -> None:
|
|
388
411
|
|
389
412
|
with get_tmpdir() as tdir: # TODO??
|
390
413
|
if mode == 'index':
|
391
|
-
do_index(
|
414
|
+
errors = do_index(
|
392
415
|
config_file=args.config,
|
393
416
|
dry=args.dry,
|
394
417
|
sources_subset=args.sources,
|
395
418
|
overwrite_db=args.overwrite,
|
396
419
|
)
|
420
|
+
if len(errors) > 0:
|
421
|
+
sys.exit(1)
|
397
422
|
elif mode == 'serve':
|
398
423
|
server.run(args)
|
399
424
|
elif mode == 'demo':
|
promnesia/cannon.py
CHANGED
@@ -9,16 +9,17 @@ are same content, but you can't tell that by URL equality. Even canonical urls a
|
|
9
9
|
|
10
10
|
Also some experiments to establish 'URL hierarchy'.
|
11
11
|
"""
|
12
|
-
|
12
|
+
from __future__ import annotations
|
13
13
|
|
14
|
-
from itertools import chain
|
15
14
|
import re
|
16
15
|
import typing
|
17
|
-
from typing import Iterable, NamedTuple, Set, Optional, List, Sequence, Union, Tuple, Dict, Any, Collection
|
18
|
-
|
19
16
|
import urllib.parse
|
20
|
-
from
|
17
|
+
from collections.abc import Collection, Iterable, Sequence
|
21
18
|
|
19
|
+
# TODO eh?? they fixed mobile.twitter.com?
|
20
|
+
from itertools import chain
|
21
|
+
from typing import Any, NamedTuple, Union
|
22
|
+
from urllib.parse import SplitResult, parse_qsl, urlencode, urlsplit, urlunsplit
|
22
23
|
|
23
24
|
# this has some benchmark, but quite a few librarires seem unmaintained, sadly
|
24
25
|
# I guess i'll stick to default for now, until it's a critical bottleneck
|
@@ -108,11 +109,11 @@ default_qkeep = [
|
|
108
109
|
|
109
110
|
# TODO perhaps, decide if fragment is meaningful (e.g. wiki) or random sequence of letters?
|
110
111
|
class Spec(NamedTuple):
|
111
|
-
qkeep :
|
112
|
-
qremove:
|
112
|
+
qkeep : Collection[str] | bool | None = None
|
113
|
+
qremove: set[str] | None = None
|
113
114
|
fkeep : bool = False
|
114
115
|
|
115
|
-
def keep_query(self, q: str) ->
|
116
|
+
def keep_query(self, q: str) -> int | None: # returns order
|
116
117
|
if self.qkeep is True:
|
117
118
|
return 1
|
118
119
|
qkeep = {
|
@@ -134,13 +135,13 @@ class Spec(NamedTuple):
|
|
134
135
|
return None
|
135
136
|
|
136
137
|
@classmethod
|
137
|
-
def make(cls, **kwargs) ->
|
138
|
+
def make(cls, **kwargs) -> Spec:
|
138
139
|
return cls(**kwargs)
|
139
140
|
|
140
141
|
S = Spec
|
141
142
|
|
142
143
|
# TODO perhaps these can be machine learnt from large set of urls?
|
143
|
-
specs:
|
144
|
+
specs: dict[str, Spec] = {
|
144
145
|
'youtube.com': S(
|
145
146
|
# TODO search_query?
|
146
147
|
qkeep=[ # note: experimental.. order matters here
|
@@ -178,7 +179,6 @@ specs: Dict[str, Spec] = {
|
|
178
179
|
|
179
180
|
'source', 'tsid', 'refsrc', 'pnref', 'rc', '_rdr', 'src', 'hc_location', 'section', 'permPage', 'soft', 'pn_ref', 'action',
|
180
181
|
'ti', 'aref', 'event_time_id', 'action_history', 'filter', 'ref_notif_type', 'has_source', 'source_newsfeed_story_type',
|
181
|
-
'ref_notif_type',
|
182
182
|
},
|
183
183
|
),
|
184
184
|
'physicstravelguide.com': S(fkeep=True), # TODO instead, pass fkeep marker object for shorter spec?
|
@@ -218,10 +218,10 @@ Spec2 = Any # TODO
|
|
218
218
|
|
219
219
|
# TODO this should be a map
|
220
220
|
Frag = Any
|
221
|
-
Parts = Sequence[
|
221
|
+
Parts = Sequence[tuple[str, str]]
|
222
222
|
|
223
223
|
|
224
|
-
def _yc(domain: str, path: str, qq: Parts, frag: Frag) ->
|
224
|
+
def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> tuple[Any, Any, Parts, Frag]:
|
225
225
|
if path[:5] == '/from':
|
226
226
|
site = dict(qq).get('site')
|
227
227
|
if site is not None:
|
@@ -232,7 +232,7 @@ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts,
|
|
232
232
|
# TODO this should be in-place? for brevity?
|
233
233
|
return (domain, path, qq, frag)
|
234
234
|
|
235
|
-
def get_spec2(dom: str) ->
|
235
|
+
def get_spec2(dom: str) -> Spec2 | None:
|
236
236
|
return {
|
237
237
|
'news.ycombinator.com': _yc,
|
238
238
|
}.get(dom)
|
@@ -285,10 +285,10 @@ def transform_split(split: SplitResult):
|
|
285
285
|
REST = r'(?P<rest>.*)'
|
286
286
|
|
287
287
|
Left = Union[str, Sequence[str]]
|
288
|
-
Right =
|
288
|
+
Right = tuple[str, str, str]
|
289
289
|
# the idea is that we can unify certain URLs here and map them to the 'canonical' one
|
290
290
|
# this is a dict only for grouping but should be a list really.. todo
|
291
|
-
rules:
|
291
|
+
rules: dict[Left, Right] = {
|
292
292
|
# TODO m. handling might be quite common
|
293
293
|
# f'm.youtube.com/{REST}': ('youtube.com', '{rest}'),
|
294
294
|
(
|
@@ -322,9 +322,9 @@ def transform_split(split: SplitResult):
|
|
322
322
|
continue
|
323
323
|
gd = m.groupdict()
|
324
324
|
if len(to) == 2:
|
325
|
-
to = to
|
325
|
+
to = (*to, '')
|
326
326
|
|
327
|
-
(netloc, path, qq) =
|
327
|
+
(netloc, path, qq) = (t.format(**gd) for t in to)
|
328
328
|
qparts.extend(parse_qsl(qq, keep_blank_values=True)) # TODO hacky..
|
329
329
|
# TODO eh, qparts should really be a map or something...
|
330
330
|
break
|
@@ -361,7 +361,7 @@ def myunsplit(domain: str, path: str, query: str, fragment: str) -> str:
|
|
361
361
|
# ]
|
362
362
|
# for re in regexes:
|
363
363
|
|
364
|
-
def handle_archive_org(url: str) ->
|
364
|
+
def handle_archive_org(url: str) -> str | None:
|
365
365
|
are = r'web.archive.org/web/(?P<timestamp>\d+)/(?P<rest>.*)'
|
366
366
|
m = re.fullmatch(are, url)
|
367
367
|
if m is None:
|
@@ -422,7 +422,7 @@ def canonify(url: str) -> str:
|
|
422
422
|
qq = [(k, v) for i, k, v in sorted(iqq)]
|
423
423
|
# TODO still not sure what we should do..
|
424
424
|
# quote_plus replaces %20 with +, not sure if we want it...
|
425
|
-
query = urlencode(qq, quote_via=quote_via)
|
425
|
+
query = urlencode(qq, quote_via=quote_via)
|
426
426
|
|
427
427
|
path = _quote_path(path)
|
428
428
|
|
@@ -683,7 +683,7 @@ def domains(it): # pragma: no cover
|
|
683
683
|
try:
|
684
684
|
nurl = canonify(url)
|
685
685
|
except CanonifyException as e:
|
686
|
-
print(f"ERROR while normalising! {
|
686
|
+
print(f"ERROR while normalising! {url} {e}")
|
687
687
|
c['ERROR'] += 1
|
688
688
|
continue
|
689
689
|
else:
|
@@ -697,8 +697,8 @@ def groups(it, args): # pragma: no cover
|
|
697
697
|
all_pats = get_patterns()
|
698
698
|
|
699
699
|
from collections import Counter
|
700
|
-
c: typing.Counter[
|
701
|
-
unmatched:
|
700
|
+
c: typing.Counter[str | None] = Counter()
|
701
|
+
unmatched: list[str] = []
|
702
702
|
|
703
703
|
def dump():
|
704
704
|
print(c)
|
@@ -718,7 +718,7 @@ def groups(it, args): # pragma: no cover
|
|
718
718
|
try:
|
719
719
|
nurl = canonify(url)
|
720
720
|
except CanonifyException as e:
|
721
|
-
print(f"ERROR while normalising! {
|
721
|
+
print(f"ERROR while normalising! {url} {e}")
|
722
722
|
continue
|
723
723
|
udom = nurl[:nurl.find('/')]
|
724
724
|
usplit = udom.split('.')
|
@@ -756,10 +756,10 @@ def groups(it, args): # pragma: no cover
|
|
756
756
|
def display(it, args) -> None: # pragma: no cover
|
757
757
|
# TODO better name?
|
758
758
|
import difflib
|
759
|
-
# pylint: disable=import-error
|
760
|
-
from termcolor import colored as C # type: ignore
|
761
759
|
from sys import stdout
|
762
760
|
|
761
|
+
from termcolor import colored as C # type: ignore
|
762
|
+
|
763
763
|
for line in it:
|
764
764
|
line = line.strip()
|
765
765
|
if args.human:
|
@@ -818,7 +818,7 @@ def main() -> None: # pragma: no cover
|
|
818
818
|
|
819
819
|
- running comparison
|
820
820
|
sqlite3 promnesia.sqlite 'select distinct orig_url from visits where norm_url like "%twitter%" order by orig_url' | src/promnesia/cannon.py
|
821
|
-
''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100)
|
821
|
+
''', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=100)
|
822
822
|
)
|
823
823
|
p.add_argument('input', nargs='?')
|
824
824
|
p.add_argument('--human', action='store_true')
|