promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +14 -3
- promnesia/__main__.py +60 -35
- promnesia/cannon.py +27 -27
- promnesia/common.py +85 -67
- promnesia/compare.py +21 -22
- promnesia/compat.py +10 -10
- promnesia/config.py +23 -23
- promnesia/database/common.py +67 -0
- promnesia/database/dump.py +188 -0
- promnesia/{read_db.py → database/load.py} +16 -17
- promnesia/extract.py +14 -11
- promnesia/kjson.py +12 -11
- promnesia/logging.py +4 -4
- promnesia/misc/__init__.pyi +0 -0
- promnesia/misc/config_example.py +1 -2
- promnesia/misc/install_server.py +7 -9
- promnesia/server.py +57 -47
- promnesia/sources/__init__.pyi +0 -0
- promnesia/sources/auto.py +50 -35
- promnesia/sources/auto_logseq.py +6 -5
- promnesia/sources/auto_obsidian.py +2 -2
- promnesia/sources/browser.py +14 -9
- promnesia/sources/browser_legacy.py +26 -16
- promnesia/sources/demo.py +19 -3
- promnesia/sources/fbmessenger.py +3 -2
- promnesia/sources/filetypes.py +16 -7
- promnesia/sources/github.py +7 -9
- promnesia/sources/guess.py +2 -1
- promnesia/sources/hackernews.py +2 -2
- promnesia/sources/hpi.py +2 -2
- promnesia/sources/html.py +7 -5
- promnesia/sources/hypothesis.py +4 -3
- promnesia/sources/instapaper.py +2 -2
- promnesia/sources/markdown.py +31 -21
- promnesia/sources/org.py +27 -13
- promnesia/sources/plaintext.py +30 -29
- promnesia/sources/pocket.py +3 -2
- promnesia/sources/reddit.py +20 -19
- promnesia/sources/roamresearch.py +2 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +19 -6
- promnesia/sources/signal.py +33 -24
- promnesia/sources/smscalls.py +2 -2
- promnesia/sources/stackexchange.py +4 -3
- promnesia/sources/takeout.py +76 -9
- promnesia/sources/takeout_legacy.py +24 -12
- promnesia/sources/telegram.py +13 -11
- promnesia/sources/telegram_legacy.py +18 -7
- promnesia/sources/twitter.py +6 -5
- promnesia/sources/vcs.py +5 -3
- promnesia/sources/viber.py +10 -9
- promnesia/sources/website.py +4 -4
- promnesia/sources/zulip.py +3 -2
- promnesia/sqlite.py +7 -4
- promnesia/tests/__init__.py +0 -0
- promnesia/tests/common.py +140 -0
- promnesia/tests/server_helper.py +67 -0
- promnesia/tests/sources/__init__.py +0 -0
- promnesia/tests/sources/test_auto.py +65 -0
- promnesia/tests/sources/test_filetypes.py +43 -0
- promnesia/tests/sources/test_hypothesis.py +39 -0
- promnesia/tests/sources/test_org.py +64 -0
- promnesia/tests/sources/test_plaintext.py +25 -0
- promnesia/tests/sources/test_shellcmd.py +21 -0
- promnesia/tests/sources/test_takeout.py +56 -0
- promnesia/tests/test_cannon.py +325 -0
- promnesia/tests/test_cli.py +40 -0
- promnesia/tests/test_compare.py +30 -0
- promnesia/tests/test_config.py +289 -0
- promnesia/tests/test_db_dump.py +222 -0
- promnesia/tests/test_extract.py +65 -0
- promnesia/tests/test_extract_urls.py +43 -0
- promnesia/tests/test_indexer.py +251 -0
- promnesia/tests/test_server.py +291 -0
- promnesia/tests/test_traverse.py +39 -0
- promnesia/tests/utils.py +35 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
- promnesia-1.3.20241021.dist-info/RECORD +83 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
- promnesia/dump.py +0 -105
- promnesia-1.2.20230515.dist-info/RECORD +0 -58
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
- {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,291 @@
|
|
1
|
+
from datetime import datetime
|
2
|
+
from pathlib import Path
|
3
|
+
from subprocess import Popen
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
|
7
|
+
from ..__main__ import do_index
|
8
|
+
from .common import promnesia_bin, write_config
|
9
|
+
from .server_helper import run_server
|
10
|
+
|
11
|
+
|
12
|
+
def test_status_error() -> None:
|
13
|
+
"""
|
14
|
+
If DB doesn't exist, server should handle it gracefully and respond with error
|
15
|
+
"""
|
16
|
+
with run_server(db='/does/not/exist') as server:
|
17
|
+
response = server.post('/status')
|
18
|
+
|
19
|
+
# TODO ugh currently returns 200? maybe should return proper error, but need to handle in extension
|
20
|
+
# assert response.status_code == 404
|
21
|
+
|
22
|
+
body = response.json()
|
23
|
+
|
24
|
+
version = body['version']
|
25
|
+
assert version is not None
|
26
|
+
assert len(version.split('.')) >= 2 # random check..
|
27
|
+
|
28
|
+
assert 'ERROR' in body['db'] # defensive, it doesn't exist
|
29
|
+
|
30
|
+
|
31
|
+
def test_status_ok(tmp_path: Path) -> None:
|
32
|
+
def cfg() -> None:
|
33
|
+
from promnesia.common import Source
|
34
|
+
from promnesia.sources import demo
|
35
|
+
|
36
|
+
SOURCES = [Source(demo.index, count=10)]
|
37
|
+
|
38
|
+
cfg_path = tmp_path / 'config.py'
|
39
|
+
write_config(cfg_path, cfg)
|
40
|
+
do_index(cfg_path)
|
41
|
+
|
42
|
+
db_path = tmp_path / 'promnesia.sqlite'
|
43
|
+
with run_server(db=db_path, timezone='America/New_York') as server:
|
44
|
+
r = server.post('/status').json()
|
45
|
+
version = r['version']
|
46
|
+
assert version is not None
|
47
|
+
assert len(version.split('.')) >= 2 # random check..
|
48
|
+
|
49
|
+
assert r['db'] == str(db_path)
|
50
|
+
|
51
|
+
assert r['stats'] == {'total_visits': 10}
|
52
|
+
|
53
|
+
|
54
|
+
def test_visits(tmp_path: Path) -> None:
|
55
|
+
def cfg() -> None:
|
56
|
+
from promnesia.common import Source
|
57
|
+
from promnesia.sources import demo
|
58
|
+
|
59
|
+
SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
|
60
|
+
|
61
|
+
cfg_path = tmp_path / 'config.py'
|
62
|
+
write_config(cfg_path, cfg)
|
63
|
+
do_index(cfg_path)
|
64
|
+
|
65
|
+
# force timezone here, otherwise dependeing on the test env response varies
|
66
|
+
with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
|
67
|
+
r = server.post('/visits', json={'url': 'whatever'}).json()
|
68
|
+
assert r['visits'] == []
|
69
|
+
|
70
|
+
r = server.post('/visits', json={'url': 'https://demo.com/page0.html'})
|
71
|
+
rj = r.json()
|
72
|
+
assert rj['normalised_url'] == 'demo.com/page0.html'
|
73
|
+
[v] = rj['visits']
|
74
|
+
assert v['src'] == 'demo'
|
75
|
+
assert v['locator']['title'] == 'demo'
|
76
|
+
|
77
|
+
assert v['dt'] == '01 Jan 2000 00:00:00 -0500'
|
78
|
+
|
79
|
+
|
80
|
+
def test_visits_hierarchy(tmp_path: Path) -> None:
|
81
|
+
def cfg() -> None:
|
82
|
+
from datetime import datetime
|
83
|
+
|
84
|
+
from promnesia.common import Loc, Source, Visit
|
85
|
+
from promnesia.sources import demo
|
86
|
+
|
87
|
+
def indexer():
|
88
|
+
visits = list(demo.index(count=6))
|
89
|
+
yield Visit(
|
90
|
+
url='https://reddit.com/post1',
|
91
|
+
dt=datetime.fromisoformat('2023-12-04'),
|
92
|
+
locator=Loc.make('reddit'),
|
93
|
+
)
|
94
|
+
yield Visit(
|
95
|
+
url='https://reddit.com/post1/comment2',
|
96
|
+
dt=datetime.fromisoformat('2023-12-02'),
|
97
|
+
locator=Loc.make('reddit'),
|
98
|
+
context='I am comment 2',
|
99
|
+
)
|
100
|
+
yield from visits[:3]
|
101
|
+
yield Visit(
|
102
|
+
url='https://reddit.com/post2',
|
103
|
+
dt=datetime.fromisoformat('2023-12-05'),
|
104
|
+
locator=Loc.make('reddit'),
|
105
|
+
)
|
106
|
+
yield from visits[3:]
|
107
|
+
yield Visit(
|
108
|
+
url='https://reddit.com/post1/ihavenocontext',
|
109
|
+
dt=datetime.fromisoformat('2023-12-06'),
|
110
|
+
locator=Loc.make('reddit'),
|
111
|
+
)
|
112
|
+
yield Visit(
|
113
|
+
url='https://reddit.com/post1/comment1',
|
114
|
+
dt=datetime.fromisoformat('2023-12-06'),
|
115
|
+
locator=Loc.make('reddit'),
|
116
|
+
context='I am comment 1',
|
117
|
+
)
|
118
|
+
|
119
|
+
SOURCES = [Source(indexer)]
|
120
|
+
|
121
|
+
cfg_path = tmp_path / 'config.py'
|
122
|
+
write_config(cfg_path, cfg)
|
123
|
+
do_index(cfg_path)
|
124
|
+
|
125
|
+
# force timezone here, otherwise dependeing on the test env response varies
|
126
|
+
with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
|
127
|
+
r = server.post('/visits', json={'url': 'https://reddit.com/post1'}).json()
|
128
|
+
# retuns exact match + 'child' visits that are interesting (e.g. have context)
|
129
|
+
assert {v['original_url'] for v in r['visits']} == {
|
130
|
+
'https://reddit.com/post1',
|
131
|
+
'https://reddit.com/post1/comment1',
|
132
|
+
'https://reddit.com/post1/comment2',
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
def test_visited(tmp_path: Path) -> None:
|
137
|
+
def cfg() -> None:
|
138
|
+
from promnesia.common import Source
|
139
|
+
from promnesia.sources import demo
|
140
|
+
|
141
|
+
SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
|
142
|
+
|
143
|
+
cfg_path = tmp_path / 'config.py'
|
144
|
+
write_config(cfg_path, cfg)
|
145
|
+
do_index(cfg_path)
|
146
|
+
|
147
|
+
test_url = 'https://demo.com/page5.html'
|
148
|
+
|
149
|
+
# force timezone here, otherwise dependeing on the test env response varies
|
150
|
+
with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
|
151
|
+
r = server.post('/visited', json={'urls': []}).json()
|
152
|
+
assert r == []
|
153
|
+
|
154
|
+
r = server.post('/visited', json={'urls': [test_url, 'http://badurl.org']}).json()
|
155
|
+
[r1, r2] = r
|
156
|
+
assert r1['original_url'] == test_url
|
157
|
+
assert r2 is None
|
158
|
+
|
159
|
+
|
160
|
+
def test_search(tmp_path: Path) -> None:
|
161
|
+
# TODO not sure if should index at all here or just insert DbVisits directly?
|
162
|
+
def cfg() -> None:
|
163
|
+
from datetime import datetime
|
164
|
+
|
165
|
+
from promnesia.common import Loc, Source, Visit
|
166
|
+
from promnesia.sources import demo
|
167
|
+
|
168
|
+
def indexer():
|
169
|
+
visits = list(demo.index(count=6))
|
170
|
+
yield Visit(
|
171
|
+
url='https://someone.org/something',
|
172
|
+
dt=datetime.fromisoformat('2023-12-04T11:12:13+03:00'),
|
173
|
+
locator=Loc.make('whatever'),
|
174
|
+
)
|
175
|
+
yield from visits[:3]
|
176
|
+
yield Visit(
|
177
|
+
url='https://wiki.termux.com/wiki/Termux-setup-storage',
|
178
|
+
locator=Loc.make(
|
179
|
+
title='Reddit comment',
|
180
|
+
href='https://reddit.com/r/termux/comments/m4qrxt/cant_open_storageshared_in_termux/gso0kak/',
|
181
|
+
),
|
182
|
+
dt=datetime.fromisoformat('2023-12-02'),
|
183
|
+
context='perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage',
|
184
|
+
)
|
185
|
+
yield from visits[3:]
|
186
|
+
|
187
|
+
SOURCES = [Source(indexer)]
|
188
|
+
|
189
|
+
cfg_path = tmp_path / 'config.py'
|
190
|
+
write_config(cfg_path, cfg)
|
191
|
+
do_index(cfg_path)
|
192
|
+
|
193
|
+
with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
|
194
|
+
# FIXME 'url' is actually kinda misleading -- it can be any text
|
195
|
+
rj = server.post('/search', json={'url': 'someone'}).json()
|
196
|
+
# TODO maybe return in chronological order or something? not sure
|
197
|
+
[v1, v2] = sorted(rj['visits'], key=lambda j: j['dt'])
|
198
|
+
|
199
|
+
assert v1['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
|
200
|
+
assert v1['dt'] == '02 Dec 2023 00:00:00 -0500' # uses server timezone (original visit didn't have it)
|
201
|
+
|
202
|
+
assert v2['normalised_url'] == 'someone.org/something'
|
203
|
+
assert v2['dt'] == '04 Dec 2023 11:12:13 +0300' # uses original visit timezone
|
204
|
+
|
205
|
+
rj = server.post('/search', json={'url': 'comment'}).json()
|
206
|
+
[v] = rj['visits']
|
207
|
+
assert v['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
|
208
|
+
|
209
|
+
|
210
|
+
def test_search_around(tmp_path: Path) -> None:
|
211
|
+
# this should return visits up to 3 hours in the past
|
212
|
+
def cfg() -> None:
|
213
|
+
from promnesia.common import Source
|
214
|
+
from promnesia.sources import demo
|
215
|
+
|
216
|
+
# generates 60 visits within 10 mins of each other -- so spanning over 10 hours
|
217
|
+
SOURCES = [Source(demo.index, count=60, base_dt='2000-01-01T00:00:00+03:00', delta=10 * 60)]
|
218
|
+
|
219
|
+
cfg_path = tmp_path / 'config.py'
|
220
|
+
write_config(cfg_path, cfg)
|
221
|
+
do_index(cfg_path)
|
222
|
+
|
223
|
+
# TODO hmm. perhaps it makes more sense to run query in different process and server in main process for testing??
|
224
|
+
with run_server(db=tmp_path / 'promnesia.sqlite') as server:
|
225
|
+
rj = server.post(
|
226
|
+
'/search_around',
|
227
|
+
json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
|
228
|
+
).json()
|
229
|
+
assert rj['visits'] == []
|
230
|
+
|
231
|
+
rj = server.post(
|
232
|
+
'/search_around',
|
233
|
+
json={'timestamp': datetime.fromisoformat('2000-01-01T07:55:00+06:00').timestamp()},
|
234
|
+
).json()
|
235
|
+
visits = rj['visits']
|
236
|
+
assert len(visits) == 18 # 6 per hour * 3
|
237
|
+
assert visits[0 ]['dt'] == '01 Jan 2000 02:00:00 +0300'
|
238
|
+
assert visits[-1]['dt'] == '01 Jan 2000 04:50:00 +0300'
|
239
|
+
|
240
|
+
|
241
|
+
@pytest.mark.parametrize('mode', ['update', 'overwrite'])
|
242
|
+
def test_query_while_indexing(tmp_path: Path, mode: str) -> None:
|
243
|
+
overwrite = mode == 'overwrite'
|
244
|
+
moverwrite = ['--overwrite'] if overwrite else []
|
245
|
+
|
246
|
+
def _index(run_id: str) -> Popen:
|
247
|
+
def cfg(run_id: str) -> None:
|
248
|
+
from promnesia.common import Source
|
249
|
+
from promnesia.sources import demo
|
250
|
+
|
251
|
+
SOURCES = [Source(demo.index, count=1_000, name=run_id)]
|
252
|
+
|
253
|
+
cfg_path = tmp_path / f'config{run_id}.py'
|
254
|
+
write_config(cfg_path, cfg, run_id=run_id)
|
255
|
+
|
256
|
+
return Popen(promnesia_bin('index', '--config', cfg_path, *moverwrite))
|
257
|
+
|
258
|
+
# trigger initial indexing
|
259
|
+
with _index(run_id='0'):
|
260
|
+
pass
|
261
|
+
|
262
|
+
with run_server(db=tmp_path / 'promnesia.sqlite') as server:
|
263
|
+
rj = server.post(
|
264
|
+
'/search_around',
|
265
|
+
json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
|
266
|
+
).json()
|
267
|
+
assert rj['visits'] == []
|
268
|
+
|
269
|
+
for run_id in range(1, 5):
|
270
|
+
with _index(run_id=str(run_id)) as indexer:
|
271
|
+
# hammer the backend to increase likelihood of race condition
|
272
|
+
while indexer.poll() is None:
|
273
|
+
stats = server.post('/status').json()['stats']
|
274
|
+
total_visits = stats['total_visits']
|
275
|
+
if overwrite:
|
276
|
+
assert total_visits >= 1_000
|
277
|
+
else:
|
278
|
+
assert total_visits >= 1_000 * run_id
|
279
|
+
|
280
|
+
|
281
|
+
# TODO also could check server methods directly?
|
282
|
+
# via something like this... but not sure if really makes much difference
|
283
|
+
# import promnesia.server as S
|
284
|
+
# S.EnvConfig.set(S.ServerConfig(
|
285
|
+
# # TODO populate with test db and benchmark properly...
|
286
|
+
# db=Path('/todo'),
|
287
|
+
# timezone=pytz.utc,
|
288
|
+
# ))
|
289
|
+
# links = [f'https://reddit.com/whatever{i}.html' for i in range(count)]
|
290
|
+
# res = S.visited(links)
|
291
|
+
# assert len(res) == len(links)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from unittest.mock import patch
|
2
|
+
|
3
|
+
from ..common import traverse
|
4
|
+
from .common import get_testdata
|
5
|
+
|
6
|
+
testDataPath = get_testdata('traverse')
|
7
|
+
|
8
|
+
|
9
|
+
# Patch shutil.which so it always returns false (when trying to which fdfind, etc)
|
10
|
+
# so that it falls back to find
|
11
|
+
@patch('promnesia.common.shutil.which', return_value=False)
|
12
|
+
def test_traverse_ignore_find(patched) -> None:
|
13
|
+
'''
|
14
|
+
traverse() with `find` but ignore some stuff
|
15
|
+
'''
|
16
|
+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
|
17
|
+
|
18
|
+
assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}
|
19
|
+
|
20
|
+
|
21
|
+
def test_traverse_ignore_fdfind():
|
22
|
+
'''
|
23
|
+
traverse() with `fdfind` but ignore some stuff
|
24
|
+
'''
|
25
|
+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
|
26
|
+
|
27
|
+
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
|
28
|
+
|
29
|
+
|
30
|
+
# TODO: It would be nice to test the implementation directly without having to do this
|
31
|
+
# weird patching in the future
|
32
|
+
@patch('promnesia.common._is_windows', new_callable=lambda: True)
|
33
|
+
def test_traverse_ignore_windows(patched) -> None:
|
34
|
+
'''
|
35
|
+
traverse() with python when _is_windows is true but ignore some stuff
|
36
|
+
'''
|
37
|
+
paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
|
38
|
+
|
39
|
+
assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
|
promnesia/tests/utils.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
2
|
+
from datetime import datetime, timedelta
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Optional, Union
|
5
|
+
|
6
|
+
from ..common import Loc, Source, Visit
|
7
|
+
from ..database.dump import visits_to_sqlite
|
8
|
+
from ..extract import extract_visits
|
9
|
+
|
10
|
+
# TODO a bit shit... why did I make it dict at first??
|
11
|
+
Urls = Union[
|
12
|
+
Mapping[str, Optional[str]],
|
13
|
+
Sequence[tuple[str, Optional[str]]],
|
14
|
+
]
|
15
|
+
|
16
|
+
|
17
|
+
def index_urls(urls: Urls, *, source_name: str = 'test'):
|
18
|
+
uuu = list(urls.items()) if isinstance(urls, dict) else urls
|
19
|
+
|
20
|
+
def idx(tmp_path: Path) -> None:
|
21
|
+
def indexer():
|
22
|
+
for i, (url, ctx) in enumerate(uuu):
|
23
|
+
yield Visit(
|
24
|
+
url=url,
|
25
|
+
dt=datetime.min + timedelta(days=5000) + timedelta(hours=i),
|
26
|
+
locator=Loc.make('test'),
|
27
|
+
context=ctx,
|
28
|
+
)
|
29
|
+
|
30
|
+
db_visits = extract_visits(source=Source(indexer), src=source_name)
|
31
|
+
errors = visits_to_sqlite(vit=db_visits, overwrite_db=True, _db_path=tmp_path / 'promnesia.sqlite')
|
32
|
+
|
33
|
+
assert len(errors) == 0, errors
|
34
|
+
|
35
|
+
return idx
|
@@ -1,19 +1,18 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: promnesia
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.20241021
|
4
4
|
Summary: Enhancement of your browsing history
|
5
5
|
Home-page: https://github.com/karlicoss/promnesia
|
6
6
|
Author: Dmitrii Gerasimov
|
7
7
|
Author-email: karlicoss@gmail.com
|
8
|
-
|
9
|
-
|
10
|
-
Requires-Python: >=3.8
|
8
|
+
Requires-Python: >=3.9
|
9
|
+
License-File: LICENSE
|
11
10
|
Requires-Dist: appdirs
|
12
11
|
Requires-Dist: tzlocal
|
13
12
|
Requires-Dist: more-itertools
|
13
|
+
Requires-Dist: typing-extensions
|
14
14
|
Requires-Dist: pytz
|
15
|
-
Requires-Dist: sqlalchemy
|
16
|
-
Requires-Dist: cachew (>=0.8.0)
|
15
|
+
Requires-Dist: sqlalchemy >=2.0
|
17
16
|
Requires-Dist: urlextract
|
18
17
|
Requires-Dist: fastapi
|
19
18
|
Requires-Dist: uvicorn[standard]
|
@@ -26,33 +25,31 @@ Requires-Dist: HPI ; extra == 'all'
|
|
26
25
|
Requires-Dist: beautifulsoup4 ; extra == 'all'
|
27
26
|
Requires-Dist: lxml ; extra == 'all'
|
28
27
|
Requires-Dist: mistletoe ; extra == 'all'
|
29
|
-
Requires-Dist: orgparse
|
28
|
+
Requires-Dist: orgparse >=0.3.0 ; extra == 'all'
|
30
29
|
Provides-Extra: html
|
31
30
|
Requires-Dist: beautifulsoup4 ; extra == 'html'
|
32
31
|
Requires-Dist: lxml ; extra == 'html'
|
33
|
-
Provides-Extra: linting
|
34
|
-
Requires-Dist: pytest ; extra == 'linting'
|
35
|
-
Requires-Dist: mypy ; extra == 'linting'
|
36
|
-
Requires-Dist: lxml ; extra == 'linting'
|
37
32
|
Provides-Extra: markdown
|
38
33
|
Requires-Dist: mistletoe ; extra == 'markdown'
|
39
34
|
Provides-Extra: optional
|
40
35
|
Requires-Dist: logzero ; extra == 'optional'
|
41
36
|
Requires-Dist: python-magic ; extra == 'optional'
|
42
37
|
Provides-Extra: org
|
43
|
-
Requires-Dist: orgparse
|
38
|
+
Requires-Dist: orgparse >=0.3.0 ; extra == 'org'
|
44
39
|
Provides-Extra: telegram
|
45
40
|
Provides-Extra: testing
|
46
41
|
Requires-Dist: pytest ; extra == 'testing'
|
47
42
|
Requires-Dist: pytest-timeout ; extra == 'testing'
|
48
43
|
Requires-Dist: pytest-xdist ; extra == 'testing'
|
44
|
+
Requires-Dist: hypothesis ; extra == 'testing'
|
49
45
|
Requires-Dist: psutil ; extra == 'testing'
|
50
|
-
Requires-Dist: requests
|
51
|
-
Requires-Dist: httpie ; extra == 'testing'
|
46
|
+
Requires-Dist: requests ; extra == 'testing'
|
52
47
|
Requires-Dist: selenium ; extra == 'testing'
|
53
48
|
Requires-Dist: click ; extra == 'testing'
|
54
|
-
Requires-Dist:
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
Requires-Dist: ruff ; extra == 'testing'
|
50
|
+
Requires-Dist: mypy ; extra == 'testing'
|
51
|
+
Requires-Dist: lxml ; extra == 'testing'
|
52
|
+
Requires-Dist: loguru ; extra == 'testing'
|
53
|
+
Provides-Extra: testing-gui
|
54
|
+
Requires-Dist: pyautogui ; extra == 'testing-gui'
|
58
55
|
|
@@ -0,0 +1,83 @@
|
|
1
|
+
promnesia/__init__.py,sha256=8ZrCJe2kJb0DuYIiNeiUm0XU0nsjTlctcjdFoy9DVYw,457
|
2
|
+
promnesia/__main__.py,sha256=y7Jgcc1uSKbxKhFDrlBlYh75kZm_unS9vH_3HvWCpnk,15421
|
3
|
+
promnesia/cannon.py,sha256=TZ4b5P0qZpo0CVKCQLthyPFtAW5DYZT5ylttr5TmikI,24522
|
4
|
+
promnesia/common.py,sha256=iUQh3Z-XqWxOPqnOHL8sBHEEJcGTztITBXnYizA02zs,20098
|
5
|
+
promnesia/compare.py,sha256=vREaDTOjrGG43qOZEq2E9jV7fihO0U-KBMuguUMNgnk,4572
|
6
|
+
promnesia/compat.py,sha256=cxk8ZOv0LnSwLCab_UrsxQWcKiiZnWgf2xK13L-ql4w,456
|
7
|
+
promnesia/config.py,sha256=hFHdFbcB2q35gU1zlmtYj6VkHR_1SSH4U2M5G4HQa_g,4721
|
8
|
+
promnesia/extract.py,sha256=m6D-QVA54ldcffcAby8w2OIDg0GPmAgDSGVWhSTwA60,2810
|
9
|
+
promnesia/kjson.py,sha256=GPpeIpvtXwaocBw1W1QzPWN8UJcF0USSlaMxRSEkH-U,3400
|
10
|
+
promnesia/logging.py,sha256=z3Otc_JzZREnMG0QerqoOR6zFJ6Ls6ATGydczCADZSY,5923
|
11
|
+
promnesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
promnesia/server.py,sha256=LBka2jYuzISVAyNIQaf8IRGIbcpSFo1g-vn8TI5VOhE,14647
|
13
|
+
promnesia/sqlite.py,sha256=4jZhqMGviSZK5wNqd7PbVC0z-D3UWyeM0RXrabAruZ8,1378
|
14
|
+
promnesia/database/common.py,sha256=9iENKM0pRL1PKrSAhGzNreDFsQNQBIGZWKuV5E8aalw,1843
|
15
|
+
promnesia/database/dump.py,sha256=v5m5mdnY7Tu7Yu-mlQHAFW46Kgt4dJRIOWOGOs88gEY,6116
|
16
|
+
promnesia/database/load.py,sha256=kP-HdNL_3q94P_Cj5pnxarHqFgCVJFrsbr0vogDRtFU,1357
|
17
|
+
promnesia/misc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
promnesia/misc/config_example.py,sha256=MXxJCVjm9MrEwPR8SHTwkJRDcqWcqJWtfJg_M6yhRMo,364
|
19
|
+
promnesia/misc/install_server.py,sha256=NEEGvpun7_lMBGXKvRGrq5wcGT_xgMm7MvPcftR15pI,4757
|
20
|
+
promnesia/sources/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
|
+
promnesia/sources/auto.py,sha256=hiecNxttUGFskR5H2W35K-6daEhhiPpH2ZRpOZBBr50,11695
|
22
|
+
promnesia/sources/auto_logseq.py,sha256=1npVDEXB8wAM6XwTku0nZb57yQ9mM7vWct9albgvGxw,445
|
23
|
+
promnesia/sources/auto_obsidian.py,sha256=UoVWAkdCbh7Ij3JFIy6ylYOd6776c7-z2xaR6b8snYc,205
|
24
|
+
promnesia/sources/browser.py,sha256=qmuDzu0iJjpQc-WX2LEZ3TnpHhb9H-b5H-hZV7lbVkY,3087
|
25
|
+
promnesia/sources/browser_legacy.py,sha256=rNZydnWnnQ3sAzgRgFYnuqQKHUg8MxS5iiAmHAh9rV8,10429
|
26
|
+
promnesia/sources/demo.py,sha256=TQVVl6089TlEntiGSO-9EhqXBf9U_ipUnhsVqDCOspg,1033
|
27
|
+
promnesia/sources/fbmessenger.py,sha256=Dpvxh_5_ozaqGOSHxCApoIda58Z33r0_N6Uk2BIMde8,997
|
28
|
+
promnesia/sources/filetypes.py,sha256=2d6taAuGT4jvfCEqF9US8jukT6sBuN0wBEPJToCKiww,3643
|
29
|
+
promnesia/sources/github.py,sha256=arDUZShPi8Dzz9EhYfk4j1OK3ugsADsjAhfXy00ng0c,2777
|
30
|
+
promnesia/sources/guess.py,sha256=DMC7pj7rAA5yGZ6hMp31y4QvSugVNA9osexBLd5oCmc,803
|
31
|
+
promnesia/sources/hackernews.py,sha256=zocx4XrP2QY5UW-ErSMej7lrYZH0xmotqaIBVGyNH7M,1247
|
32
|
+
promnesia/sources/hpi.py,sha256=cHQlEJAH1EeTiawB1kcjXPt4UYDL1ZTNIHadBvd5QH4,383
|
33
|
+
promnesia/sources/html.py,sha256=ill3XtIObZoK1a8FY6OZGoOwmqjtSiiiwmqjzSbZhQQ,1153
|
34
|
+
promnesia/sources/hypothesis.py,sha256=qbiP6xJN53A6YkSUgA7hUa3ZD4KmtUnhJfAzXUHdt0o,1387
|
35
|
+
promnesia/sources/instapaper.py,sha256=zIq6AClPb8Zfkdft7U4VgEgODlEeQRowygGz06DaGUQ,988
|
36
|
+
promnesia/sources/markdown.py,sha256=KHHwxezCQRpcJLoyt_qB44NQwZGL7BnPT-Ehud21eR4,3871
|
37
|
+
promnesia/sources/org.py,sha256=BD1DpDcxXWWjirKnrovY2tDQpgj5YAt175z45Z_o0jI,5589
|
38
|
+
promnesia/sources/plaintext.py,sha256=8aYkmBYmXKnxyshWqgwUgQ03oG-OMKRi7DXsWtcgpVA,3138
|
39
|
+
promnesia/sources/pocket.py,sha256=PWmjAgg8nSDubBv_4WuWmyupB6NP8SAuuKiV8ZLE4xY,1133
|
40
|
+
promnesia/sources/reddit.py,sha256=u-Ou0xTZO-s9SM_GKHfNawLV5Yv7PmH4lggy_yq1XnA,5630
|
41
|
+
promnesia/sources/roamresearch.py,sha256=_WKurvGea9JHMKlapQyH44Kfg-AD40fb7xse26It0Fo,1077
|
42
|
+
promnesia/sources/rss.py,sha256=6Ijy6omXGjzMLkmwijq6JBRIY7ruArb-hXppRYY4ln0,824
|
43
|
+
promnesia/sources/shellcmd.py,sha256=bXWfHv6XFmVUQHUUWn2E_C96tIdmIxb1-yqAQRtTYIk,2739
|
44
|
+
promnesia/sources/signal.py,sha256=4ZBUXQoab0OG1r8ieZhSHpMNrzsFDomVXBhqb8hRYPA,14793
|
45
|
+
promnesia/sources/smscalls.py,sha256=N6jMHHr4bUlQORQcTVeGsdrmW4ep7jpCEFSi26sRpxY,774
|
46
|
+
promnesia/sources/stackexchange.py,sha256=_s8HJfhblVNspvbLEnuse5J8BvFFrboIdv5jURTZ2eE,658
|
47
|
+
promnesia/sources/takeout.py,sha256=mv6j8mkDb8lJsv41iBsJBzFpbGp7jXIBWYcAFMTvdls,8017
|
48
|
+
promnesia/sources/takeout_legacy.py,sha256=iaSLu73RutEx1UW4m-n3hk7-ISlVl2d-W0ZKP__20XU,4130
|
49
|
+
promnesia/sources/telegram.py,sha256=hSI6zxQh2zqdBiIAGJzY2Vi5A_233jDy0tGU0Q-Q-EU,2979
|
50
|
+
promnesia/sources/telegram_legacy.py,sha256=gRe6Exw-svMQZhlussmBGg0EJq81XadSa5-mh2m8ztI,4524
|
51
|
+
promnesia/sources/twitter.py,sha256=_tIU0rQ3b96WXKIPJQRyRAzG7AAnPp9ANytrcVJ5Z0U,1886
|
52
|
+
promnesia/sources/vcs.py,sha256=wMcgXr1Nd7DxM5j405ip4SPxJIqhdY4SePpp9qPL9q8,1654
|
53
|
+
promnesia/sources/viber.py,sha256=C7e9AxN5sgC-SLyCdUapzwC3eGuA0WQUGfO8Br5r53A,5990
|
54
|
+
promnesia/sources/website.py,sha256=GbDZChKK1_KLWXiLUUcZLOvep6h2AWmqECudwmupSl8,1811
|
55
|
+
promnesia/sources/zulip.py,sha256=GN8YiUmFa3jLXFmNst47YILu0U-kPawbxDKY-Mtobf0,770
|
56
|
+
promnesia/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
57
|
+
promnesia/tests/common.py,sha256=2D9YspEWhKplbG6ZrVACkBjp_NEJB7qaXzOMjWHitCY,4092
|
58
|
+
promnesia/tests/server_helper.py,sha256=Z1Rc_88WmOwukCyVR5Cb4XaXg4xewHiy4vlFAxQkoBU,2199
|
59
|
+
promnesia/tests/test_cannon.py,sha256=VZPWZAtcAdRzQwVJ01qUIgvxB3VJl72Gqg0GI9tr8To,12711
|
60
|
+
promnesia/tests/test_cli.py,sha256=g-9mgs4_tu6bGNZz3_swP3oj1L2V_o9ycjlFzIR9WYE,1404
|
61
|
+
promnesia/tests/test_compare.py,sha256=JDQzFWaIMkKCn6YYKJPrZjdbZ6LZn5ig793q62LwSFc,945
|
62
|
+
promnesia/tests/test_config.py,sha256=VqX2R0UcXmxp8O-p6C5Unk9W2Iv3RkIhvKe3ga84GHc,6534
|
63
|
+
promnesia/tests/test_db_dump.py,sha256=PMGiTnp_cfDxLUfzYrchKKewoF84q6TooEw445K-WiA,6766
|
64
|
+
promnesia/tests/test_extract.py,sha256=kD2iNodCj2OHM7_sQ_3DTRHJMZItJ7FuE855GpxZ3jM,2340
|
65
|
+
promnesia/tests/test_extract_urls.py,sha256=Ybez16VdYoveL9b_50Ca9vbMogWHOvFqDOJPGZrzQJg,1961
|
66
|
+
promnesia/tests/test_indexer.py,sha256=Oo93qqPrP0xXY1XhcRQkoJuEGpREfs6qoH335f_5_dI,8981
|
67
|
+
promnesia/tests/test_server.py,sha256=c1HxEKlqTxbBrKu7tmg58G8Z9E4jNcxq-YENxxfVqIQ,10786
|
68
|
+
promnesia/tests/test_traverse.py,sha256=DUN-NbrZ8b6MKrtZaOavAnZelMPk_aqN2ylvRV7UqHo,1350
|
69
|
+
promnesia/tests/utils.py,sha256=TsRaLYN9slHHoNJmPSKwgoP82ICHBvEjT2G4scAraIQ,1136
|
70
|
+
promnesia/tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
|
+
promnesia/tests/sources/test_auto.py,sha256=wWqf7x_BXeVSYTDIaD6ZXqqRvqk312biY4iOGcMa4PY,1848
|
72
|
+
promnesia/tests/sources/test_filetypes.py,sha256=6xhpN9hQb2U6FF4lAEIXWupk-t_HxQjNfk9A6krEnx4,1359
|
73
|
+
promnesia/tests/sources/test_hypothesis.py,sha256=4anFCfBqzJueiDhhg2WtvITMIofAWRMB2-Ja1Cb2beY,1400
|
74
|
+
promnesia/tests/sources/test_org.py,sha256=cefCAc7JVtrzHblbLHYB28tsjzfV1b4h80_JRO1Zamk,2572
|
75
|
+
promnesia/tests/sources/test_plaintext.py,sha256=Bn7v2HhL1FSLqCKPy-BX5OLy1PlWIyYjvRTLAWrXVWg,827
|
76
|
+
promnesia/tests/sources/test_shellcmd.py,sha256=K4hDQl3yd4t-6JFeDtB1gtH2XFImSYdYe1pG0UOO-Uw,678
|
77
|
+
promnesia/tests/sources/test_takeout.py,sha256=HVreW_4pZP8TjGNmrJva5JJfkexmOwwr7cRxwU1Qg_Q,1557
|
78
|
+
promnesia-1.3.20241021.dist-info/LICENSE,sha256=rgO9acPmnw53ZBxiXBdp8kfxmRcekhg_Q7HN65BPihs,1074
|
79
|
+
promnesia-1.3.20241021.dist-info/METADATA,sha256=YLJgGCkJ4LTXKid3Ieq-hBk5aJVc3c000Diutdk-RlI,1903
|
80
|
+
promnesia-1.3.20241021.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
|
81
|
+
promnesia-1.3.20241021.dist-info/entry_points.txt,sha256=hz1qfzQSRh4kkVkJWk4hnYqE9A1nobEbKlLG_0nNxzE,54
|
82
|
+
promnesia-1.3.20241021.dist-info/top_level.txt,sha256=7yvIpooFiuNLf9yLdu9MTADz57z0YTAqSu7aSG9ujSU,10
|
83
|
+
promnesia-1.3.20241021.dist-info/RECORD,,
|
promnesia/dump.py
DELETED
@@ -1,105 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
import shutil
|
3
|
-
from typing import List, Set, Iterable
|
4
|
-
|
5
|
-
from more_itertools import chunked
|
6
|
-
|
7
|
-
from sqlalchemy import create_engine, MetaData, Table, event, text
|
8
|
-
|
9
|
-
from cachew import NTBinder
|
10
|
-
|
11
|
-
from .common import get_logger, DbVisit, get_tmpdir, Res, now_tz, Loc
|
12
|
-
from . import config
|
13
|
-
|
14
|
-
|
15
|
-
# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
|
16
|
-
# since as far as sql is concerned it should all be in the same transaction. only a guess
|
17
|
-
# not sure it's the proper way to handle it
|
18
|
-
# see test_index_many
|
19
|
-
_CHUNK_BY = 10
|
20
|
-
|
21
|
-
# I guess 1 hour is definitely enough
|
22
|
-
_CONNECTION_TIMEOUT_SECONDS = 3600
|
23
|
-
|
24
|
-
# returns critical warnings
|
25
|
-
def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
|
26
|
-
logger = get_logger()
|
27
|
-
db_path = config.get().db
|
28
|
-
|
29
|
-
now = now_tz()
|
30
|
-
ok = 0
|
31
|
-
errors = 0
|
32
|
-
def vit_ok() -> Iterable[DbVisit]:
|
33
|
-
nonlocal errors, ok
|
34
|
-
for v in vit:
|
35
|
-
if isinstance(v, DbVisit):
|
36
|
-
ok += 1
|
37
|
-
yield v
|
38
|
-
else:
|
39
|
-
errors += 1
|
40
|
-
# conform to the schema and dump. can't hurt anyway
|
41
|
-
ev = DbVisit(
|
42
|
-
norm_url='<error>',
|
43
|
-
orig_url='<error>',
|
44
|
-
dt=now,
|
45
|
-
locator=Loc.make('<errror>'),
|
46
|
-
src='error',
|
47
|
-
# todo attach backtrace?
|
48
|
-
context=repr(v),
|
49
|
-
)
|
50
|
-
yield ev
|
51
|
-
|
52
|
-
tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
|
53
|
-
if overwrite_db:
|
54
|
-
# here we don't need timeout, since it's a brand new DB
|
55
|
-
engine = create_engine(f'sqlite:///{tpath}')
|
56
|
-
else:
|
57
|
-
# here we need a timeout, othewise concurrent indexing might not work
|
58
|
-
# (note that this also needs WAL mode)
|
59
|
-
# see test_concurrent_indexing
|
60
|
-
engine = create_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS})
|
61
|
-
|
62
|
-
# using WAL keeps database readable while we're writing in it
|
63
|
-
# this is tested by test_query_while_indexing
|
64
|
-
def enable_wal(dbapi_con, con_record) -> None:
|
65
|
-
dbapi_con.execute('PRAGMA journal_mode = WAL')
|
66
|
-
event.listen(engine, 'connect', enable_wal)
|
67
|
-
|
68
|
-
binder = NTBinder.make(DbVisit)
|
69
|
-
meta = MetaData()
|
70
|
-
table = Table('visits', meta, *binder.columns)
|
71
|
-
|
72
|
-
cleared: Set[str] = set()
|
73
|
-
ncleared = 0
|
74
|
-
with engine.begin() as conn:
|
75
|
-
table.create(conn, checkfirst=True)
|
76
|
-
|
77
|
-
for chunk in chunked(vit_ok(), n=_CHUNK_BY):
|
78
|
-
srcs = set(v.src or '' for v in chunk)
|
79
|
-
new = srcs.difference(cleared)
|
80
|
-
|
81
|
-
for src in new:
|
82
|
-
conn.execute(table.delete().where(table.c.src == src))
|
83
|
-
cursor = conn.execute(text("SELECT changes()")).fetchone()
|
84
|
-
assert cursor is not None
|
85
|
-
ncleared += cursor[0]
|
86
|
-
cleared.add(src)
|
87
|
-
|
88
|
-
bound = [binder.to_row(x) for x in chunk]
|
89
|
-
# pylint: disable=no-value-for-parameter
|
90
|
-
conn.execute(table.insert().values(bound))
|
91
|
-
engine.dispose()
|
92
|
-
|
93
|
-
if overwrite_db:
|
94
|
-
shutil.move(str(tpath), str(db_path))
|
95
|
-
|
96
|
-
errs = '' if errors == 0 else f', {errors} ERRORS'
|
97
|
-
total = ok + errors
|
98
|
-
what = 'overwritten' if overwrite_db else 'updated'
|
99
|
-
logger.info(
|
100
|
-
'%s database "%s". %d total (%d OK%s, %d cleared, +%d more)',
|
101
|
-
what, db_path, total, ok, errs, ncleared, ok - ncleared)
|
102
|
-
res: List[Exception] = []
|
103
|
-
if total == 0:
|
104
|
-
res.append(RuntimeError('No visits were indexed, something is probably wrong!'))
|
105
|
-
return res
|