promnesia 1.2.20230515__py3-none-any.whl → 1.2.20240810__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. promnesia/__main__.py +26 -14
  2. promnesia/cannon.py +4 -4
  3. promnesia/common.py +39 -28
  4. promnesia/compare.py +3 -2
  5. promnesia/config.py +4 -2
  6. promnesia/database/common.py +66 -0
  7. promnesia/database/dump.py +187 -0
  8. promnesia/{read_db.py → database/load.py} +10 -11
  9. promnesia/extract.py +1 -0
  10. promnesia/kjson.py +1 -1
  11. promnesia/logging.py +3 -3
  12. promnesia/misc/__init__.pyi +0 -0
  13. promnesia/misc/config_example.py +1 -2
  14. promnesia/misc/install_server.py +2 -3
  15. promnesia/server.py +18 -19
  16. promnesia/sources/__init__.pyi +0 -0
  17. promnesia/sources/auto.py +9 -7
  18. promnesia/sources/browser_legacy.py +11 -5
  19. promnesia/sources/demo.py +18 -2
  20. promnesia/sources/filetypes.py +7 -0
  21. promnesia/sources/github.py +2 -2
  22. promnesia/sources/hypothesis.py +1 -1
  23. promnesia/sources/markdown.py +15 -15
  24. promnesia/sources/org.py +7 -3
  25. promnesia/sources/plaintext.py +3 -1
  26. promnesia/sources/reddit.py +2 -2
  27. promnesia/sources/rss.py +1 -1
  28. promnesia/sources/signal.py +22 -14
  29. promnesia/sources/stackexchange.py +2 -2
  30. promnesia/sources/takeout.py +58 -1
  31. promnesia/sources/takeout_legacy.py +10 -2
  32. promnesia/tests/__init__.py +0 -0
  33. promnesia/tests/common.py +137 -0
  34. promnesia/tests/server_helper.py +64 -0
  35. promnesia/tests/sources/__init__.py +0 -0
  36. promnesia/tests/sources/test_auto.py +66 -0
  37. promnesia/tests/sources/test_filetypes.py +42 -0
  38. promnesia/tests/sources/test_hypothesis.py +39 -0
  39. promnesia/tests/sources/test_org.py +65 -0
  40. promnesia/tests/sources/test_plaintext.py +26 -0
  41. promnesia/tests/sources/test_shellcmd.py +22 -0
  42. promnesia/tests/sources/test_takeout.py +58 -0
  43. promnesia/tests/test_cannon.py +325 -0
  44. promnesia/tests/test_cli.py +42 -0
  45. promnesia/tests/test_compare.py +30 -0
  46. promnesia/tests/test_config.py +290 -0
  47. promnesia/tests/test_db_dump.py +223 -0
  48. promnesia/tests/test_extract.py +61 -0
  49. promnesia/tests/test_extract_urls.py +43 -0
  50. promnesia/tests/test_indexer.py +245 -0
  51. promnesia/tests/test_server.py +292 -0
  52. promnesia/tests/test_traverse.py +41 -0
  53. promnesia/tests/utils.py +35 -0
  54. {promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +13 -17
  55. promnesia-1.2.20240810.dist-info/RECORD +83 -0
  56. {promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
  57. {promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
  58. promnesia/dump.py +0 -105
  59. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  60. {promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
  61. {promnesia-1.2.20230515.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,292 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from subprocess import Popen
4
+
5
+ import pytest
6
+
7
+ from ..__main__ import do_index
8
+
9
+ from .common import promnesia_bin, write_config
10
+ from .server_helper import run_server
11
+
12
+
13
+ def test_status_error() -> None:
14
+ """
15
+ If DB doesn't exist, server should handle it gracefully and respond with error
16
+ """
17
+ with run_server(db='/does/not/exist') as server:
18
+ response = server.post('/status')
19
+
20
+ # TODO ugh currently returns 200? maybe should return proper error, but need to handle in extension
21
+ # assert response.status_code == 404
22
+
23
+ body = response.json()
24
+
25
+ version = body['version']
26
+ assert version is not None
27
+ assert len(version.split('.')) >= 2 # random check..
28
+
29
+ assert 'ERROR' in body['db'] # defensive, it doesn't exist
30
+
31
+
32
+ def test_status_ok(tmp_path: Path) -> None:
33
+ def cfg() -> None:
34
+ from promnesia.common import Source
35
+ from promnesia.sources import demo
36
+
37
+ SOURCES = [Source(demo.index, count=10)]
38
+
39
+ cfg_path = tmp_path / 'config.py'
40
+ write_config(cfg_path, cfg)
41
+ do_index(cfg_path)
42
+
43
+ db_path = tmp_path / 'promnesia.sqlite'
44
+ with run_server(db=db_path, timezone='America/New_York') as server:
45
+ r = server.post('/status').json()
46
+ version = r['version']
47
+ assert version is not None
48
+ assert len(version.split('.')) >= 2 # random check..
49
+
50
+ assert r['db'] == str(db_path)
51
+
52
+ assert r['stats'] == {'total_visits': 10}
53
+
54
+
55
+ def test_visits(tmp_path: Path) -> None:
56
+ def cfg() -> None:
57
+ from promnesia.common import Source
58
+ from promnesia.sources import demo
59
+
60
+ SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
61
+
62
+ cfg_path = tmp_path / 'config.py'
63
+ write_config(cfg_path, cfg)
64
+ do_index(cfg_path)
65
+
66
+ # force timezone here, otherwise dependeing on the test env response varies
67
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
68
+ r = server.post('/visits', json={'url': 'whatever'}).json()
69
+ assert r['visits'] == []
70
+
71
+ r = server.post('/visits', json={'url': 'https://demo.com/page0.html'})
72
+ rj = r.json()
73
+ assert rj['normalised_url'] == 'demo.com/page0.html'
74
+ [v] = rj['visits']
75
+ assert v['src'] == 'demo'
76
+ assert v['locator']['title'] == 'demo'
77
+
78
+ assert v['dt'] == '01 Jan 2000 00:00:00 -0500'
79
+
80
+
81
+ def test_visits_hierarchy(tmp_path: Path) -> None:
82
+ def cfg() -> None:
83
+ from datetime import datetime
84
+
85
+ from promnesia.common import Source, Visit, Loc
86
+ from promnesia.sources import demo
87
+
88
+ def indexer():
89
+ visits = list(demo.index(count=6))
90
+ yield Visit(
91
+ url='https://reddit.com/post1',
92
+ dt=datetime.fromisoformat('2023-12-04'),
93
+ locator=Loc.make('reddit'),
94
+ )
95
+ yield Visit(
96
+ url='https://reddit.com/post1/comment2',
97
+ dt=datetime.fromisoformat('2023-12-02'),
98
+ locator=Loc.make('reddit'),
99
+ context='I am comment 2',
100
+ )
101
+ yield from visits[:3]
102
+ yield Visit(
103
+ url='https://reddit.com/post2',
104
+ dt=datetime.fromisoformat('2023-12-05'),
105
+ locator=Loc.make('reddit'),
106
+ )
107
+ yield from visits[3:]
108
+ yield Visit(
109
+ url='https://reddit.com/post1/ihavenocontext',
110
+ dt=datetime.fromisoformat('2023-12-06'),
111
+ locator=Loc.make('reddit'),
112
+ )
113
+ yield Visit(
114
+ url='https://reddit.com/post1/comment1',
115
+ dt=datetime.fromisoformat('2023-12-06'),
116
+ locator=Loc.make('reddit'),
117
+ context='I am comment 1',
118
+ )
119
+
120
+ SOURCES = [Source(indexer)]
121
+
122
+ cfg_path = tmp_path / 'config.py'
123
+ write_config(cfg_path, cfg)
124
+ do_index(cfg_path)
125
+
126
+ # force timezone here, otherwise dependeing on the test env response varies
127
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
128
+ r = server.post('/visits', json={'url': 'https://reddit.com/post1'}).json()
129
+ # retuns exact match + 'child' visits that are interesting (e.g. have context)
130
+ assert {v['original_url'] for v in r['visits']} == {
131
+ 'https://reddit.com/post1',
132
+ 'https://reddit.com/post1/comment1',
133
+ 'https://reddit.com/post1/comment2',
134
+ }
135
+
136
+
137
+ def test_visited(tmp_path: Path) -> None:
138
+ def cfg() -> None:
139
+ from promnesia.common import Source
140
+ from promnesia.sources import demo
141
+
142
+ SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
143
+
144
+ cfg_path = tmp_path / 'config.py'
145
+ write_config(cfg_path, cfg)
146
+ do_index(cfg_path)
147
+
148
+ test_url = 'https://demo.com/page5.html'
149
+
150
+ # force timezone here, otherwise dependeing on the test env response varies
151
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
152
+ r = server.post('/visited', json={'urls': []}).json()
153
+ assert r == []
154
+
155
+ r = server.post('/visited', json={'urls': [test_url, 'http://badurl.org']}).json()
156
+ [r1, r2] = r
157
+ assert r1['original_url'] == test_url
158
+ assert r2 is None
159
+
160
+
161
+ def test_search(tmp_path: Path) -> None:
162
+ # TODO not sure if should index at all here or just insert DbVisits directly?
163
+ def cfg() -> None:
164
+ from datetime import datetime
165
+
166
+ from promnesia.common import Source, Visit, Loc
167
+ from promnesia.sources import demo
168
+
169
+ def indexer():
170
+ visits = list(demo.index(count=6))
171
+ yield Visit(
172
+ url='https://someone.org/something',
173
+ dt=datetime.fromisoformat('2023-12-04T11:12:13+03:00'),
174
+ locator=Loc.make('whatever'),
175
+ )
176
+ yield from visits[:3]
177
+ yield Visit(
178
+ url='https://wiki.termux.com/wiki/Termux-setup-storage',
179
+ locator=Loc.make(
180
+ title='Reddit comment',
181
+ href='https://reddit.com/r/termux/comments/m4qrxt/cant_open_storageshared_in_termux/gso0kak/',
182
+ ),
183
+ dt=datetime.fromisoformat('2023-12-02'),
184
+ context='perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage',
185
+ )
186
+ yield from visits[3:]
187
+
188
+ SOURCES = [Source(indexer)]
189
+
190
+ cfg_path = tmp_path / 'config.py'
191
+ write_config(cfg_path, cfg)
192
+ do_index(cfg_path)
193
+
194
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
195
+ # FIXME 'url' is actually kinda misleading -- it can be any text
196
+ rj = server.post('/search', json={'url': 'someone'}).json()
197
+ # TODO maybe return in chronological order or something? not sure
198
+ [v1, v2] = sorted(rj['visits'], key=lambda j: j['dt'])
199
+
200
+ assert v1['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
201
+ assert v1['dt'] == '02 Dec 2023 00:00:00 -0500' # uses server timezone (original visit didn't have it)
202
+
203
+ assert v2['normalised_url'] == 'someone.org/something'
204
+ assert v2['dt'] == '04 Dec 2023 11:12:13 +0300' # uses original visit timezone
205
+
206
+ rj = server.post('/search', json={'url': 'comment'}).json()
207
+ [v] = rj['visits']
208
+ assert v['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
209
+
210
+
211
+ def test_search_around(tmp_path: Path) -> None:
212
+ # this should return visits up to 3 hours in the past
213
+ def cfg() -> None:
214
+ from promnesia.common import Source
215
+ from promnesia.sources import demo
216
+
217
+ # generates 60 visits within 10 mins of each other -- so spanning over 10 hours
218
+ SOURCES = [Source(demo.index, count=60, base_dt='2000-01-01T00:00:00+03:00', delta=10 * 60)]
219
+
220
+ cfg_path = tmp_path / 'config.py'
221
+ write_config(cfg_path, cfg)
222
+ do_index(cfg_path)
223
+
224
+ # TODO hmm. perhaps it makes more sense to run query in different process and server in main process for testing??
225
+ with run_server(db=tmp_path / 'promnesia.sqlite') as server:
226
+ rj = server.post(
227
+ '/search_around',
228
+ json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
229
+ ).json()
230
+ assert rj['visits'] == []
231
+
232
+ rj = server.post(
233
+ '/search_around',
234
+ json={'timestamp': datetime.fromisoformat('2000-01-01T07:55:00+06:00').timestamp()},
235
+ ).json()
236
+ visits = rj['visits']
237
+ assert len(visits) == 18 # 6 per hour * 3
238
+ assert visits[0 ]['dt'] == '01 Jan 2000 02:00:00 +0300'
239
+ assert visits[-1]['dt'] == '01 Jan 2000 04:50:00 +0300'
240
+
241
+
242
+ @pytest.mark.parametrize('mode', ['update', 'overwrite'])
243
+ def test_query_while_indexing(tmp_path: Path, mode: str) -> None:
244
+ overwrite = mode == 'overwrite'
245
+ moverwrite = ['--overwrite'] if overwrite else []
246
+
247
+ def _index(run_id: str) -> Popen:
248
+ def cfg(run_id: str) -> None:
249
+ from promnesia.common import Source
250
+ from promnesia.sources import demo
251
+
252
+ SOURCES = [Source(demo.index, count=1_000, name=run_id)]
253
+
254
+ cfg_path = tmp_path / f'config{run_id}.py'
255
+ write_config(cfg_path, cfg, run_id=run_id)
256
+
257
+ return Popen(promnesia_bin('index', '--config', cfg_path, *moverwrite))
258
+
259
+ # trigger initial indexing
260
+ with _index(run_id='0'):
261
+ pass
262
+
263
+ with run_server(db=tmp_path / 'promnesia.sqlite') as server:
264
+ rj = server.post(
265
+ '/search_around',
266
+ json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
267
+ ).json()
268
+ assert rj['visits'] == []
269
+
270
+ for run_id in range(1, 5):
271
+ with _index(run_id=str(run_id)) as indexer:
272
+ # hammer the backend to increase likelihood of race condition
273
+ while indexer.poll() is None:
274
+ stats = server.post('/status').json()['stats']
275
+ total_visits = stats['total_visits']
276
+ if overwrite:
277
+ assert total_visits >= 1_000
278
+ else:
279
+ assert total_visits >= 1_000 * run_id
280
+
281
+
282
+ # TODO also could check server methods directly?
283
+ # via something like this... but not sure if really makes much difference
284
+ # import promnesia.server as S
285
+ # S.EnvConfig.set(S.ServerConfig(
286
+ # # TODO populate with test db and benchmark properly...
287
+ # db=Path('/todo'),
288
+ # timezone=pytz.utc,
289
+ # ))
290
+ # links = [f'https://reddit.com/whatever{i}.html' for i in range(count)]
291
+ # res = S.visited(links)
292
+ # assert len(res) == len(links)
@@ -0,0 +1,41 @@
1
+ from unittest.mock import patch
2
+
3
+ from ..common import traverse
4
+
5
+ from .common import get_testdata
6
+
7
+
8
+ testDataPath = get_testdata('traverse')
9
+
10
+
11
+ # Patch shutil.which so it always returns false (when trying to which fdfind, etc)
12
+ # so that it falls back to find
13
+ @patch('promnesia.common.shutil.which', return_value=False)
14
+ def test_traverse_ignore_find(patched) -> None:
15
+ '''
16
+ traverse() with `find` but ignore some stuff
17
+ '''
18
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
19
+
20
+ assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}
21
+
22
+
23
+ def test_traverse_ignore_fdfind():
24
+ '''
25
+ traverse() with `fdfind` but ignore some stuff
26
+ '''
27
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
28
+
29
+ assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
30
+
31
+
32
+ # TODO: It would be nice to test the implementation directly without having to do this
33
+ # weird patching in the future
34
+ @patch('promnesia.common._is_windows', new_callable=lambda: True)
35
+ def test_traverse_ignore_windows(patched) -> None:
36
+ '''
37
+ traverse() with python when _is_windows is true but ignore some stuff
38
+ '''
39
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
40
+
41
+ assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
@@ -0,0 +1,35 @@
1
+ from datetime import datetime, timedelta
2
+ from pathlib import Path
3
+ from typing import Mapping, Optional, Sequence, Tuple, Union
4
+
5
+ from ..common import Source, Loc, Visit
6
+ from ..database.dump import visits_to_sqlite
7
+ from ..extract import extract_visits
8
+
9
+
10
+ # TODO a bit shit... why did I make it dict at first??
11
+ Urls = Union[
12
+ Mapping[str, Optional[str]],
13
+ Sequence[Tuple[str, Optional[str]]],
14
+ ]
15
+
16
+
17
+ def index_urls(urls: Urls, *, source_name: str = 'test'):
18
+ uuu = list(urls.items()) if isinstance(urls, dict) else urls
19
+
20
+ def idx(tmp_path: Path) -> None:
21
+ def indexer():
22
+ for i, (url, ctx) in enumerate(uuu):
23
+ yield Visit(
24
+ url=url,
25
+ dt=datetime.min + timedelta(days=5000) + timedelta(hours=i),
26
+ locator=Loc.make('test'),
27
+ context=ctx,
28
+ )
29
+
30
+ db_visits = extract_visits(source=Source(indexer), src=source_name)
31
+ errors = visits_to_sqlite(vit=db_visits, overwrite_db=True, _db_path=tmp_path / 'promnesia.sqlite')
32
+
33
+ assert len(errors) == 0, errors
34
+
35
+ return idx
@@ -1,19 +1,17 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: promnesia
3
- Version: 1.2.20230515
3
+ Version: 1.2.20240810
4
4
  Summary: Enhancement of your browsing history
5
5
  Home-page: https://github.com/karlicoss/promnesia
6
6
  Author: Dmitrii Gerasimov
7
7
  Author-email: karlicoss@gmail.com
8
- License: UNKNOWN
9
- Platform: UNKNOWN
10
8
  Requires-Python: >=3.8
9
+ License-File: LICENSE
11
10
  Requires-Dist: appdirs
12
11
  Requires-Dist: tzlocal
13
12
  Requires-Dist: more-itertools
14
13
  Requires-Dist: pytz
15
- Requires-Dist: sqlalchemy
16
- Requires-Dist: cachew (>=0.8.0)
14
+ Requires-Dist: sqlalchemy >=2.0
17
15
  Requires-Dist: urlextract
18
16
  Requires-Dist: fastapi
19
17
  Requires-Dist: uvicorn[standard]
@@ -26,33 +24,31 @@ Requires-Dist: HPI ; extra == 'all'
26
24
  Requires-Dist: beautifulsoup4 ; extra == 'all'
27
25
  Requires-Dist: lxml ; extra == 'all'
28
26
  Requires-Dist: mistletoe ; extra == 'all'
29
- Requires-Dist: orgparse (>=0.3.0) ; extra == 'all'
27
+ Requires-Dist: orgparse >=0.3.0 ; extra == 'all'
30
28
  Provides-Extra: html
31
29
  Requires-Dist: beautifulsoup4 ; extra == 'html'
32
30
  Requires-Dist: lxml ; extra == 'html'
33
- Provides-Extra: linting
34
- Requires-Dist: pytest ; extra == 'linting'
35
- Requires-Dist: mypy ; extra == 'linting'
36
- Requires-Dist: lxml ; extra == 'linting'
37
31
  Provides-Extra: markdown
38
32
  Requires-Dist: mistletoe ; extra == 'markdown'
39
33
  Provides-Extra: optional
40
34
  Requires-Dist: logzero ; extra == 'optional'
41
35
  Requires-Dist: python-magic ; extra == 'optional'
42
36
  Provides-Extra: org
43
- Requires-Dist: orgparse (>=0.3.0) ; extra == 'org'
37
+ Requires-Dist: orgparse >=0.3.0 ; extra == 'org'
44
38
  Provides-Extra: telegram
45
39
  Provides-Extra: testing
46
40
  Requires-Dist: pytest ; extra == 'testing'
47
41
  Requires-Dist: pytest-timeout ; extra == 'testing'
48
42
  Requires-Dist: pytest-xdist ; extra == 'testing'
43
+ Requires-Dist: hypothesis ; extra == 'testing'
49
44
  Requires-Dist: psutil ; extra == 'testing'
50
- Requires-Dist: requests (<2.30.0) ; extra == 'testing'
51
- Requires-Dist: httpie ; extra == 'testing'
45
+ Requires-Dist: requests ; extra == 'testing'
52
46
  Requires-Dist: selenium ; extra == 'testing'
53
47
  Requires-Dist: click ; extra == 'testing'
54
- Requires-Dist: pyautogui ; extra == 'testing'
55
-
56
- UNKNOWN
57
-
48
+ Requires-Dist: ruff ; extra == 'testing'
49
+ Requires-Dist: mypy ; extra == 'testing'
50
+ Requires-Dist: lxml ; extra == 'testing'
51
+ Requires-Dist: loguru ; extra == 'testing'
52
+ Provides-Extra: testing-gui
53
+ Requires-Dist: pyautogui ; extra == 'testing-gui'
58
54
 
@@ -0,0 +1,83 @@
1
+ promnesia/__init__.py,sha256=rxVINCLqxpZFknz9QeMA6McpnzR8Hi0zhZdoyYCGUlM,367
2
+ promnesia/__main__.py,sha256=5HVE6tyBCzTKs19rDGZIkotrmaoeTRBVtTCgRnKluh8,15251
3
+ promnesia/cannon.py,sha256=dRfRk4bnII5D0rxKnimZiN727imYuyR2Kfw3pWEOxLs,24596
4
+ promnesia/common.py,sha256=iMBdyYHzjma1mGP02-ugDpkN-WoyBE6Uku4Il8fuWzc,19546
5
+ promnesia/compare.py,sha256=XjhleEzl_hzSvuinDR_I9oin5U65HkAbNCJRJQ6BwM8,4639
6
+ promnesia/compat.py,sha256=SP1Nj3jwFoKKYLnx5qVirHdsufehcb42DShqS3PPzR4,389
7
+ promnesia/config.py,sha256=JFLY28C11tpvwBnPYT-VfdbsD1hivPvZMzojOh5tSJs,4743
8
+ promnesia/extract.py,sha256=JIVCxCvkn02YnGoUvLL0pAGPHXpepq-C_2brYbtN4ts,2804
9
+ promnesia/kjson.py,sha256=9hMw-sY-avA99aWV8cG03HL2AlfvlzE5YfWw1_5LwM4,3313
10
+ promnesia/logging.py,sha256=s_dhobXuTl8tibcpKCxktGduECKWIiEkYe0atcAToIQ,5923
11
+ promnesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ promnesia/server.py,sha256=42sByMUialuZQoKRF_zgxky7ykc9d-cNMwKBJAEaH68,14636
13
+ promnesia/sqlite.py,sha256=lB7YZ9jdALbo_gblmVn7z3Dn5M6FJg7Nz2y51Z0pl1c,1353
14
+ promnesia/database/common.py,sha256=HRw47RhxqN6Wfb5LVfbc1eRzT8LSJus1colY_S9ijm8,1814
15
+ promnesia/database/dump.py,sha256=JV3j8J5pJJmPocRZalJqq0zz1532p9lVxW7_lcXbvx4,6101
16
+ promnesia/database/load.py,sha256=Q2TLyV0rzvfvq8kkAhIIvcCC4D-eEbCH5wmsZL6jYg0,1353
17
+ promnesia/misc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ promnesia/misc/config_example.py,sha256=MXxJCVjm9MrEwPR8SHTwkJRDcqWcqJWtfJg_M6yhRMo,364
19
+ promnesia/misc/install_server.py,sha256=-dPTJjM-jktVf25mcrXRL6Uj6Pji72lPnC3iyg_m8Gk,4820
20
+ promnesia/sources/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ promnesia/sources/auto.py,sha256=MhOesdn2YfPPepNkOBSihT_CDoGXfyTIA0y8wxcKqn0,11710
22
+ promnesia/sources/auto_logseq.py,sha256=eXOHIOx9MladhyD1IyLd90lyqO6gG2F5uObnuHFQrZA,439
23
+ promnesia/sources/auto_obsidian.py,sha256=PLWPHj9U3pnwLWw7-fzEk12F4Xdigq0VrvSAn_yS-yw,213
24
+ promnesia/sources/browser.py,sha256=BzdVVMm6N-U0F8_sng51deara9maY1TzMF6e7s8VCXg,2992
25
+ promnesia/sources/browser_legacy.py,sha256=3vARw78yXWvp3RQzfPbkXgtZB5iWSbXIKgadMIG4gGs,10400
26
+ promnesia/sources/demo.py,sha256=-5vADo4S-fDNEEPHGrh9pYoIIW_tzApRl6hIy8K3b2Q,1041
27
+ promnesia/sources/fbmessenger.py,sha256=_QdM3-vLutN-dh4vxfQlQkRDiorWA2EwixL49AH7lbM,969
28
+ promnesia/sources/filetypes.py,sha256=W0OVmhtvJKbWk2dju8Uwbo0TfNHye--cKrCPfL_DfBw,3593
29
+ promnesia/sources/github.py,sha256=hHTlJZvxe6TzzALX_z8R9FUQgVw39n22XYWPc62oPfo,2753
30
+ promnesia/sources/guess.py,sha256=-km2_hsrznYrws1lA3ZN5L6ixJunEsgQBFDU7juqPpM,776
31
+ promnesia/sources/hackernews.py,sha256=ReI362AKjZ79Lu99qJNPZtCoyHr6AycghvQrmDFRKgw,1228
32
+ promnesia/sources/hpi.py,sha256=yQIOT-OQ4Rhs7xXbszkNWjj46jvLDgP-xt7bSQV9wRk,361
33
+ promnesia/sources/html.py,sha256=FOLcXzV3J5VEHMkpuF4Q0rrht5zdGsDNQZJMu2WWYZ8,1108
34
+ promnesia/sources/hypothesis.py,sha256=PNuYKV-G7o8trMtgEzK3qX2NcBOhlZuf5Yg71FpWoQY,1359
35
+ promnesia/sources/instapaper.py,sha256=K2feGkgiFp6pNJoDYlyLs9m8MkDizoXy2QUg-Fjm-O0,969
36
+ promnesia/sources/markdown.py,sha256=acI2FwkawcAZX21i4kK44ligKKbj7TZDB5c0a9ihZOk,3762
37
+ promnesia/sources/org.py,sha256=oiZsNdW-oxq4a2D5_TYBUXPdPkPguSosaoyEp9dnyvc,5499
38
+ promnesia/sources/plaintext.py,sha256=aiMwd7WWtE65iTWiLFIow-RBDqozbjGfPgTdkzuMjaI,3259
39
+ promnesia/sources/pocket.py,sha256=4JCNrYs23Nq0c0hw_UxaxaIJbiqF2JVu5GAql-hfNZM,1105
40
+ promnesia/sources/reddit.py,sha256=DIEWQoWGpuQXGOmcZ9lANkPYuLhB9rzq1u23HllqLF4,5623
41
+ promnesia/sources/roamresearch.py,sha256=Su-2ykyjdylvDMogQjAgiGpLkh4Inw-K8yeAhGv678E,1068
42
+ promnesia/sources/rss.py,sha256=5-DmUesDv_GCOF5Qo7e7P3QrS6WdryZpFidazfI24Wg,870
43
+ promnesia/sources/shellcmd.py,sha256=m2tT8RSoOjXz7eC-OOXGGKleod-1A_XtWFlOucO3so0,2644
44
+ promnesia/sources/signal.py,sha256=iTU9z3hFiWjuGr5noLl8Vy3TlKj8zIMFlhNj2741h00,14728
45
+ promnesia/sources/smscalls.py,sha256=HHM8SGDx9qS5tZfpIn_iUcKYQIyryQMZvIkqUNZKHtk,755
46
+ promnesia/sources/stackexchange.py,sha256=vlidNoqWNUtR8dyHya9D7ZlHX5a2OIfoZq951M_V8c8,630
47
+ promnesia/sources/takeout.py,sha256=jDhpJkjtzC7OIlBASXHw0QNwwincBkEMgqyAhuboL7o,7918
48
+ promnesia/sources/takeout_legacy.py,sha256=ZuaP9jOuxKrbm03IOudeneWHlgT95Qr86fcGqvplKeE,4040
49
+ promnesia/sources/telegram.py,sha256=HCft7kWHdkBJiVI8OR6Wg34k4nW1vhb3tMAEJwGO1QA,2947
50
+ promnesia/sources/telegram_legacy.py,sha256=E4izRfxJWy25R0Rm-E78Op2lxcl2w6Rq5XUeYfXw2wc,4468
51
+ promnesia/sources/twitter.py,sha256=8vtLKyhtqioMmp_IKKVozLZC32TIQKjs_toB5cU2_6Q,1848
52
+ promnesia/sources/vcs.py,sha256=K2rRopKlwiNQ2_2TMWSHrwpsd5nYtPMUjHvnpNBvzBg,1609
53
+ promnesia/sources/viber.py,sha256=uFBtiJGWwuEE-gFIV0GV7eLExc0ddt3U3F_Wnkh1XKA,5948
54
+ promnesia/sources/website.py,sha256=Ii0Xka3PZk2_KJHJy9l3EYvGgSKkUehhHSmWI9tXec4,1781
55
+ promnesia/sources/zulip.py,sha256=s1bhYmx26VKC35CJF_2yiO4jdYHH0QwRYtAD3Ss6tt8,742
56
+ promnesia/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ promnesia/tests/common.py,sha256=eOFyfOLBUM8hK0gJ7JTXdjAMwRL6QN5DlRsZYAudMOM,4026
58
+ promnesia/tests/server_helper.py,sha256=W76vmwozoqJEPrlxQB0azklR28gf2RzimTHAJWOgclg,2181
59
+ promnesia/tests/test_cannon.py,sha256=bvfL5foAtDm6QzqE_nqtiSF6zAQnkWU0dlMoQp9kX0s,12702
60
+ promnesia/tests/test_cli.py,sha256=YZAsqbJcka1rjlDkb925V4A97FkiQc63JF0eT4Zb5wE,1406
61
+ promnesia/tests/test_compare.py,sha256=8pxY1DPZuGnrU1p0z4VzEawJVzzmL3sNVPce1CeiSNI,945
62
+ promnesia/tests/test_config.py,sha256=zkZXQkADHhgFzoFjqTr3zv-o6hqS0eED3wuYIMvw6dw,6532
63
+ promnesia/tests/test_db_dump.py,sha256=k6FPtcuSImMdealbgNr7mCW5NG1_jG8eESNwO1TSW1o,6751
64
+ promnesia/tests/test_extract.py,sha256=Hn-4B7T_4YrUtcO6tLxYJUkXknU97i0nEGRxejRnozI,2306
65
+ promnesia/tests/test_extract_urls.py,sha256=Ybez16VdYoveL9b_50Ca9vbMogWHOvFqDOJPGZrzQJg,1961
66
+ promnesia/tests/test_indexer.py,sha256=ec5xR9whW6z6GwKjnCGrHDp9WgTebbq7qKOIXHPLGlU,8911
67
+ promnesia/tests/test_server.py,sha256=pwlXT-qU6KXjE0ftlWB9CE1FxhpTrwJRdX8r4QDctYs,10787
68
+ promnesia/tests/test_traverse.py,sha256=F2qd1vvX2u9jJ4GVksAh6Dum-XEVLg9iCyFU6H8CZhI,1352
69
+ promnesia/tests/utils.py,sha256=etNWsXIeTMaYCTN9sIlgMKHyvY_Ek3l0Btu5rIpQEcI,1117
70
+ promnesia/tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ promnesia/tests/sources/test_auto.py,sha256=d9MwY8taALdT5C64u8G7uvk4iubZ3SHSCR50qmXzz3E,1855
72
+ promnesia/tests/sources/test_filetypes.py,sha256=w5uPsfzHfl8hS3BbXxkA3TR8rKqniRGWSO8zeIKOiM4,1338
73
+ promnesia/tests/sources/test_hypothesis.py,sha256=nUsfET_X0yGVdlpmvxaDGAPjznYsA0Jc04vXGl83QCU,1400
74
+ promnesia/tests/sources/test_org.py,sha256=BYJqAkI7TfgOkWk1W33MMQUDSBGpvSgGbFvb2r9qqYo,2572
75
+ promnesia/tests/sources/test_plaintext.py,sha256=O6lLrCvZpLaaWuyeKU3eZyf4Mx1rmarU689nifTw3hk,828
76
+ promnesia/tests/sources/test_shellcmd.py,sha256=4wyoFB3UzdviNdQX_dOpIlrzZSMXtPqRgTkRb3pk3go,679
77
+ promnesia/tests/sources/test_takeout.py,sha256=We0gSDpENuxoO9kInSmBExJrAqmfNZkA3cVQf876SsQ,1559
78
+ promnesia-1.2.20240810.dist-info/LICENSE,sha256=rgO9acPmnw53ZBxiXBdp8kfxmRcekhg_Q7HN65BPihs,1074
79
+ promnesia-1.2.20240810.dist-info/METADATA,sha256=7ilgQnAz53XkvjLFIZgf8cjh5qWwyyZ_i3G2dw5mfkI,1870
80
+ promnesia-1.2.20240810.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
81
+ promnesia-1.2.20240810.dist-info/entry_points.txt,sha256=hz1qfzQSRh4kkVkJWk4hnYqE9A1nobEbKlLG_0nNxzE,54
82
+ promnesia-1.2.20240810.dist-info/top_level.txt,sha256=7yvIpooFiuNLf9yLdu9MTADz57z0YTAqSu7aSG9ujSU,10
83
+ promnesia-1.2.20240810.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,2 @@
1
1
  [console_scripts]
2
2
  promnesia = promnesia.__main__:main
3
-
promnesia/dump.py DELETED
@@ -1,105 +0,0 @@
1
- from pathlib import Path
2
- import shutil
3
- from typing import List, Set, Iterable
4
-
5
- from more_itertools import chunked
6
-
7
- from sqlalchemy import create_engine, MetaData, Table, event, text
8
-
9
- from cachew import NTBinder
10
-
11
- from .common import get_logger, DbVisit, get_tmpdir, Res, now_tz, Loc
12
- from . import config
13
-
14
-
15
- # NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
16
- # since as far as sql is concerned it should all be in the same transaction. only a guess
17
- # not sure it's the proper way to handle it
18
- # see test_index_many
19
- _CHUNK_BY = 10
20
-
21
- # I guess 1 hour is definitely enough
22
- _CONNECTION_TIMEOUT_SECONDS = 3600
23
-
24
- # returns critical warnings
25
- def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
26
- logger = get_logger()
27
- db_path = config.get().db
28
-
29
- now = now_tz()
30
- ok = 0
31
- errors = 0
32
- def vit_ok() -> Iterable[DbVisit]:
33
- nonlocal errors, ok
34
- for v in vit:
35
- if isinstance(v, DbVisit):
36
- ok += 1
37
- yield v
38
- else:
39
- errors += 1
40
- # conform to the schema and dump. can't hurt anyway
41
- ev = DbVisit(
42
- norm_url='<error>',
43
- orig_url='<error>',
44
- dt=now,
45
- locator=Loc.make('<errror>'),
46
- src='error',
47
- # todo attach backtrace?
48
- context=repr(v),
49
- )
50
- yield ev
51
-
52
- tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
53
- if overwrite_db:
54
- # here we don't need timeout, since it's a brand new DB
55
- engine = create_engine(f'sqlite:///{tpath}')
56
- else:
57
- # here we need a timeout, othewise concurrent indexing might not work
58
- # (note that this also needs WAL mode)
59
- # see test_concurrent_indexing
60
- engine = create_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS})
61
-
62
- # using WAL keeps database readable while we're writing in it
63
- # this is tested by test_query_while_indexing
64
- def enable_wal(dbapi_con, con_record) -> None:
65
- dbapi_con.execute('PRAGMA journal_mode = WAL')
66
- event.listen(engine, 'connect', enable_wal)
67
-
68
- binder = NTBinder.make(DbVisit)
69
- meta = MetaData()
70
- table = Table('visits', meta, *binder.columns)
71
-
72
- cleared: Set[str] = set()
73
- ncleared = 0
74
- with engine.begin() as conn:
75
- table.create(conn, checkfirst=True)
76
-
77
- for chunk in chunked(vit_ok(), n=_CHUNK_BY):
78
- srcs = set(v.src or '' for v in chunk)
79
- new = srcs.difference(cleared)
80
-
81
- for src in new:
82
- conn.execute(table.delete().where(table.c.src == src))
83
- cursor = conn.execute(text("SELECT changes()")).fetchone()
84
- assert cursor is not None
85
- ncleared += cursor[0]
86
- cleared.add(src)
87
-
88
- bound = [binder.to_row(x) for x in chunk]
89
- # pylint: disable=no-value-for-parameter
90
- conn.execute(table.insert().values(bound))
91
- engine.dispose()
92
-
93
- if overwrite_db:
94
- shutil.move(str(tpath), str(db_path))
95
-
96
- errs = '' if errors == 0 else f', {errors} ERRORS'
97
- total = ok + errors
98
- what = 'overwritten' if overwrite_db else 'updated'
99
- logger.info(
100
- '%s database "%s". %d total (%d OK%s, %d cleared, +%d more)',
101
- what, db_path, total, ok, errs, ncleared, ok - ncleared)
102
- res: List[Exception] = []
103
- if total == 0:
104
- res.append(RuntimeError('No visits were indexed, something is probably wrong!'))
105
- return res