promnesia 1.2.20230515__py3-none-any.whl → 1.3.20241021__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. promnesia/__init__.py +14 -3
  2. promnesia/__main__.py +60 -35
  3. promnesia/cannon.py +27 -27
  4. promnesia/common.py +85 -67
  5. promnesia/compare.py +21 -22
  6. promnesia/compat.py +10 -10
  7. promnesia/config.py +23 -23
  8. promnesia/database/common.py +67 -0
  9. promnesia/database/dump.py +188 -0
  10. promnesia/{read_db.py → database/load.py} +16 -17
  11. promnesia/extract.py +14 -11
  12. promnesia/kjson.py +12 -11
  13. promnesia/logging.py +4 -4
  14. promnesia/misc/__init__.pyi +0 -0
  15. promnesia/misc/config_example.py +1 -2
  16. promnesia/misc/install_server.py +7 -9
  17. promnesia/server.py +57 -47
  18. promnesia/sources/__init__.pyi +0 -0
  19. promnesia/sources/auto.py +50 -35
  20. promnesia/sources/auto_logseq.py +6 -5
  21. promnesia/sources/auto_obsidian.py +2 -2
  22. promnesia/sources/browser.py +14 -9
  23. promnesia/sources/browser_legacy.py +26 -16
  24. promnesia/sources/demo.py +19 -3
  25. promnesia/sources/fbmessenger.py +3 -2
  26. promnesia/sources/filetypes.py +16 -7
  27. promnesia/sources/github.py +7 -9
  28. promnesia/sources/guess.py +2 -1
  29. promnesia/sources/hackernews.py +2 -2
  30. promnesia/sources/hpi.py +2 -2
  31. promnesia/sources/html.py +7 -5
  32. promnesia/sources/hypothesis.py +4 -3
  33. promnesia/sources/instapaper.py +2 -2
  34. promnesia/sources/markdown.py +31 -21
  35. promnesia/sources/org.py +27 -13
  36. promnesia/sources/plaintext.py +30 -29
  37. promnesia/sources/pocket.py +3 -2
  38. promnesia/sources/reddit.py +20 -19
  39. promnesia/sources/roamresearch.py +2 -1
  40. promnesia/sources/rss.py +4 -5
  41. promnesia/sources/shellcmd.py +19 -6
  42. promnesia/sources/signal.py +33 -24
  43. promnesia/sources/smscalls.py +2 -2
  44. promnesia/sources/stackexchange.py +4 -3
  45. promnesia/sources/takeout.py +76 -9
  46. promnesia/sources/takeout_legacy.py +24 -12
  47. promnesia/sources/telegram.py +13 -11
  48. promnesia/sources/telegram_legacy.py +18 -7
  49. promnesia/sources/twitter.py +6 -5
  50. promnesia/sources/vcs.py +5 -3
  51. promnesia/sources/viber.py +10 -9
  52. promnesia/sources/website.py +4 -4
  53. promnesia/sources/zulip.py +3 -2
  54. promnesia/sqlite.py +7 -4
  55. promnesia/tests/__init__.py +0 -0
  56. promnesia/tests/common.py +140 -0
  57. promnesia/tests/server_helper.py +67 -0
  58. promnesia/tests/sources/__init__.py +0 -0
  59. promnesia/tests/sources/test_auto.py +65 -0
  60. promnesia/tests/sources/test_filetypes.py +43 -0
  61. promnesia/tests/sources/test_hypothesis.py +39 -0
  62. promnesia/tests/sources/test_org.py +64 -0
  63. promnesia/tests/sources/test_plaintext.py +25 -0
  64. promnesia/tests/sources/test_shellcmd.py +21 -0
  65. promnesia/tests/sources/test_takeout.py +56 -0
  66. promnesia/tests/test_cannon.py +325 -0
  67. promnesia/tests/test_cli.py +40 -0
  68. promnesia/tests/test_compare.py +30 -0
  69. promnesia/tests/test_config.py +289 -0
  70. promnesia/tests/test_db_dump.py +222 -0
  71. promnesia/tests/test_extract.py +65 -0
  72. promnesia/tests/test_extract_urls.py +43 -0
  73. promnesia/tests/test_indexer.py +251 -0
  74. promnesia/tests/test_server.py +291 -0
  75. promnesia/tests/test_traverse.py +39 -0
  76. promnesia/tests/utils.py +35 -0
  77. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/METADATA +15 -18
  78. promnesia-1.3.20241021.dist-info/RECORD +83 -0
  79. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/WHEEL +1 -1
  80. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/entry_points.txt +0 -1
  81. promnesia/dump.py +0 -105
  82. promnesia-1.2.20230515.dist-info/RECORD +0 -58
  83. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/LICENSE +0 -0
  84. {promnesia-1.2.20230515.dist-info → promnesia-1.3.20241021.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,291 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from subprocess import Popen
4
+
5
+ import pytest
6
+
7
+ from ..__main__ import do_index
8
+ from .common import promnesia_bin, write_config
9
+ from .server_helper import run_server
10
+
11
+
12
+ def test_status_error() -> None:
13
+ """
14
+ If DB doesn't exist, server should handle it gracefully and respond with error
15
+ """
16
+ with run_server(db='/does/not/exist') as server:
17
+ response = server.post('/status')
18
+
19
+ # TODO ugh currently returns 200? maybe should return proper error, but need to handle in extension
20
+ # assert response.status_code == 404
21
+
22
+ body = response.json()
23
+
24
+ version = body['version']
25
+ assert version is not None
26
+ assert len(version.split('.')) >= 2 # random check..
27
+
28
+ assert 'ERROR' in body['db'] # defensive, it doesn't exist
29
+
30
+
31
+ def test_status_ok(tmp_path: Path) -> None:
32
+ def cfg() -> None:
33
+ from promnesia.common import Source
34
+ from promnesia.sources import demo
35
+
36
+ SOURCES = [Source(demo.index, count=10)]
37
+
38
+ cfg_path = tmp_path / 'config.py'
39
+ write_config(cfg_path, cfg)
40
+ do_index(cfg_path)
41
+
42
+ db_path = tmp_path / 'promnesia.sqlite'
43
+ with run_server(db=db_path, timezone='America/New_York') as server:
44
+ r = server.post('/status').json()
45
+ version = r['version']
46
+ assert version is not None
47
+ assert len(version.split('.')) >= 2 # random check..
48
+
49
+ assert r['db'] == str(db_path)
50
+
51
+ assert r['stats'] == {'total_visits': 10}
52
+
53
+
54
+ def test_visits(tmp_path: Path) -> None:
55
+ def cfg() -> None:
56
+ from promnesia.common import Source
57
+ from promnesia.sources import demo
58
+
59
+ SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
60
+
61
+ cfg_path = tmp_path / 'config.py'
62
+ write_config(cfg_path, cfg)
63
+ do_index(cfg_path)
64
+
65
+ # force timezone here, otherwise dependeing on the test env response varies
66
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
67
+ r = server.post('/visits', json={'url': 'whatever'}).json()
68
+ assert r['visits'] == []
69
+
70
+ r = server.post('/visits', json={'url': 'https://demo.com/page0.html'})
71
+ rj = r.json()
72
+ assert rj['normalised_url'] == 'demo.com/page0.html'
73
+ [v] = rj['visits']
74
+ assert v['src'] == 'demo'
75
+ assert v['locator']['title'] == 'demo'
76
+
77
+ assert v['dt'] == '01 Jan 2000 00:00:00 -0500'
78
+
79
+
80
+ def test_visits_hierarchy(tmp_path: Path) -> None:
81
+ def cfg() -> None:
82
+ from datetime import datetime
83
+
84
+ from promnesia.common import Loc, Source, Visit
85
+ from promnesia.sources import demo
86
+
87
+ def indexer():
88
+ visits = list(demo.index(count=6))
89
+ yield Visit(
90
+ url='https://reddit.com/post1',
91
+ dt=datetime.fromisoformat('2023-12-04'),
92
+ locator=Loc.make('reddit'),
93
+ )
94
+ yield Visit(
95
+ url='https://reddit.com/post1/comment2',
96
+ dt=datetime.fromisoformat('2023-12-02'),
97
+ locator=Loc.make('reddit'),
98
+ context='I am comment 2',
99
+ )
100
+ yield from visits[:3]
101
+ yield Visit(
102
+ url='https://reddit.com/post2',
103
+ dt=datetime.fromisoformat('2023-12-05'),
104
+ locator=Loc.make('reddit'),
105
+ )
106
+ yield from visits[3:]
107
+ yield Visit(
108
+ url='https://reddit.com/post1/ihavenocontext',
109
+ dt=datetime.fromisoformat('2023-12-06'),
110
+ locator=Loc.make('reddit'),
111
+ )
112
+ yield Visit(
113
+ url='https://reddit.com/post1/comment1',
114
+ dt=datetime.fromisoformat('2023-12-06'),
115
+ locator=Loc.make('reddit'),
116
+ context='I am comment 1',
117
+ )
118
+
119
+ SOURCES = [Source(indexer)]
120
+
121
+ cfg_path = tmp_path / 'config.py'
122
+ write_config(cfg_path, cfg)
123
+ do_index(cfg_path)
124
+
125
+ # force timezone here, otherwise dependeing on the test env response varies
126
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
127
+ r = server.post('/visits', json={'url': 'https://reddit.com/post1'}).json()
128
+ # retuns exact match + 'child' visits that are interesting (e.g. have context)
129
+ assert {v['original_url'] for v in r['visits']} == {
130
+ 'https://reddit.com/post1',
131
+ 'https://reddit.com/post1/comment1',
132
+ 'https://reddit.com/post1/comment2',
133
+ }
134
+
135
+
136
+ def test_visited(tmp_path: Path) -> None:
137
+ def cfg() -> None:
138
+ from promnesia.common import Source
139
+ from promnesia.sources import demo
140
+
141
+ SOURCES = [Source(demo.index, base_dt='2000-01-01', delta=30 * 60)]
142
+
143
+ cfg_path = tmp_path / 'config.py'
144
+ write_config(cfg_path, cfg)
145
+ do_index(cfg_path)
146
+
147
+ test_url = 'https://demo.com/page5.html'
148
+
149
+ # force timezone here, otherwise dependeing on the test env response varies
150
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
151
+ r = server.post('/visited', json={'urls': []}).json()
152
+ assert r == []
153
+
154
+ r = server.post('/visited', json={'urls': [test_url, 'http://badurl.org']}).json()
155
+ [r1, r2] = r
156
+ assert r1['original_url'] == test_url
157
+ assert r2 is None
158
+
159
+
160
+ def test_search(tmp_path: Path) -> None:
161
+ # TODO not sure if should index at all here or just insert DbVisits directly?
162
+ def cfg() -> None:
163
+ from datetime import datetime
164
+
165
+ from promnesia.common import Loc, Source, Visit
166
+ from promnesia.sources import demo
167
+
168
+ def indexer():
169
+ visits = list(demo.index(count=6))
170
+ yield Visit(
171
+ url='https://someone.org/something',
172
+ dt=datetime.fromisoformat('2023-12-04T11:12:13+03:00'),
173
+ locator=Loc.make('whatever'),
174
+ )
175
+ yield from visits[:3]
176
+ yield Visit(
177
+ url='https://wiki.termux.com/wiki/Termux-setup-storage',
178
+ locator=Loc.make(
179
+ title='Reddit comment',
180
+ href='https://reddit.com/r/termux/comments/m4qrxt/cant_open_storageshared_in_termux/gso0kak/',
181
+ ),
182
+ dt=datetime.fromisoformat('2023-12-02'),
183
+ context='perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage',
184
+ )
185
+ yield from visits[3:]
186
+
187
+ SOURCES = [Source(indexer)]
188
+
189
+ cfg_path = tmp_path / 'config.py'
190
+ write_config(cfg_path, cfg)
191
+ do_index(cfg_path)
192
+
193
+ with run_server(db=tmp_path / 'promnesia.sqlite', timezone='America/New_York') as server:
194
+ # FIXME 'url' is actually kinda misleading -- it can be any text
195
+ rj = server.post('/search', json={'url': 'someone'}).json()
196
+ # TODO maybe return in chronological order or something? not sure
197
+ [v1, v2] = sorted(rj['visits'], key=lambda j: j['dt'])
198
+
199
+ assert v1['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
200
+ assert v1['dt'] == '02 Dec 2023 00:00:00 -0500' # uses server timezone (original visit didn't have it)
201
+
202
+ assert v2['normalised_url'] == 'someone.org/something'
203
+ assert v2['dt'] == '04 Dec 2023 11:12:13 +0300' # uses original visit timezone
204
+
205
+ rj = server.post('/search', json={'url': 'comment'}).json()
206
+ [v] = rj['visits']
207
+ assert v['context'] == 'perhaps it will help someone else https://wiki.termux.com/wiki/Termux-setup-storage'
208
+
209
+
210
+ def test_search_around(tmp_path: Path) -> None:
211
+ # this should return visits up to 3 hours in the past
212
+ def cfg() -> None:
213
+ from promnesia.common import Source
214
+ from promnesia.sources import demo
215
+
216
+ # generates 60 visits within 10 mins of each other -- so spanning over 10 hours
217
+ SOURCES = [Source(demo.index, count=60, base_dt='2000-01-01T00:00:00+03:00', delta=10 * 60)]
218
+
219
+ cfg_path = tmp_path / 'config.py'
220
+ write_config(cfg_path, cfg)
221
+ do_index(cfg_path)
222
+
223
+ # TODO hmm. perhaps it makes more sense to run query in different process and server in main process for testing??
224
+ with run_server(db=tmp_path / 'promnesia.sqlite') as server:
225
+ rj = server.post(
226
+ '/search_around',
227
+ json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
228
+ ).json()
229
+ assert rj['visits'] == []
230
+
231
+ rj = server.post(
232
+ '/search_around',
233
+ json={'timestamp': datetime.fromisoformat('2000-01-01T07:55:00+06:00').timestamp()},
234
+ ).json()
235
+ visits = rj['visits']
236
+ assert len(visits) == 18 # 6 per hour * 3
237
+ assert visits[0 ]['dt'] == '01 Jan 2000 02:00:00 +0300'
238
+ assert visits[-1]['dt'] == '01 Jan 2000 04:50:00 +0300'
239
+
240
+
241
+ @pytest.mark.parametrize('mode', ['update', 'overwrite'])
242
+ def test_query_while_indexing(tmp_path: Path, mode: str) -> None:
243
+ overwrite = mode == 'overwrite'
244
+ moverwrite = ['--overwrite'] if overwrite else []
245
+
246
+ def _index(run_id: str) -> Popen:
247
+ def cfg(run_id: str) -> None:
248
+ from promnesia.common import Source
249
+ from promnesia.sources import demo
250
+
251
+ SOURCES = [Source(demo.index, count=1_000, name=run_id)]
252
+
253
+ cfg_path = tmp_path / f'config{run_id}.py'
254
+ write_config(cfg_path, cfg, run_id=run_id)
255
+
256
+ return Popen(promnesia_bin('index', '--config', cfg_path, *moverwrite))
257
+
258
+ # trigger initial indexing
259
+ with _index(run_id='0'):
260
+ pass
261
+
262
+ with run_server(db=tmp_path / 'promnesia.sqlite') as server:
263
+ rj = server.post(
264
+ '/search_around',
265
+ json={'timestamp': datetime.fromisoformat('2005-01-01T00:00:00+06:00').timestamp()},
266
+ ).json()
267
+ assert rj['visits'] == []
268
+
269
+ for run_id in range(1, 5):
270
+ with _index(run_id=str(run_id)) as indexer:
271
+ # hammer the backend to increase likelihood of race condition
272
+ while indexer.poll() is None:
273
+ stats = server.post('/status').json()['stats']
274
+ total_visits = stats['total_visits']
275
+ if overwrite:
276
+ assert total_visits >= 1_000
277
+ else:
278
+ assert total_visits >= 1_000 * run_id
279
+
280
+
281
+ # TODO also could check server methods directly?
282
+ # via something like this... but not sure if really makes much difference
283
+ # import promnesia.server as S
284
+ # S.EnvConfig.set(S.ServerConfig(
285
+ # # TODO populate with test db and benchmark properly...
286
+ # db=Path('/todo'),
287
+ # timezone=pytz.utc,
288
+ # ))
289
+ # links = [f'https://reddit.com/whatever{i}.html' for i in range(count)]
290
+ # res = S.visited(links)
291
+ # assert len(res) == len(links)
@@ -0,0 +1,39 @@
1
+ from unittest.mock import patch
2
+
3
+ from ..common import traverse
4
+ from .common import get_testdata
5
+
6
+ testDataPath = get_testdata('traverse')
7
+
8
+
9
+ # Patch shutil.which so it always returns false (when trying to which fdfind, etc)
10
+ # so that it falls back to find
11
+ @patch('promnesia.common.shutil.which', return_value=False)
12
+ def test_traverse_ignore_find(patched) -> None:
13
+ '''
14
+ traverse() with `find` but ignore some stuff
15
+ '''
16
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
17
+
18
+ assert paths == {testDataPath / 'imhere2/real.txt', testDataPath / 'imhere.txt'}
19
+
20
+
21
+ def test_traverse_ignore_fdfind():
22
+ '''
23
+ traverse() with `fdfind` but ignore some stuff
24
+ '''
25
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
26
+
27
+ assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
28
+
29
+
30
+ # TODO: It would be nice to test the implementation directly without having to do this
31
+ # weird patching in the future
32
+ @patch('promnesia.common._is_windows', new_callable=lambda: True)
33
+ def test_traverse_ignore_windows(patched) -> None:
34
+ '''
35
+ traverse() with python when _is_windows is true but ignore some stuff
36
+ '''
37
+ paths = set(traverse(testDataPath, ignore=['ignoreme.txt', 'ignoreme2']))
38
+
39
+ assert paths == {testDataPath / 'imhere.txt', testDataPath / 'imhere2/real.txt'}
@@ -0,0 +1,35 @@
1
+ from collections.abc import Mapping, Sequence
2
+ from datetime import datetime, timedelta
3
+ from pathlib import Path
4
+ from typing import Optional, Union
5
+
6
+ from ..common import Loc, Source, Visit
7
+ from ..database.dump import visits_to_sqlite
8
+ from ..extract import extract_visits
9
+
10
+ # TODO a bit shit... why did I make it dict at first??
11
+ Urls = Union[
12
+ Mapping[str, Optional[str]],
13
+ Sequence[tuple[str, Optional[str]]],
14
+ ]
15
+
16
+
17
+ def index_urls(urls: Urls, *, source_name: str = 'test'):
18
+ uuu = list(urls.items()) if isinstance(urls, dict) else urls
19
+
20
+ def idx(tmp_path: Path) -> None:
21
+ def indexer():
22
+ for i, (url, ctx) in enumerate(uuu):
23
+ yield Visit(
24
+ url=url,
25
+ dt=datetime.min + timedelta(days=5000) + timedelta(hours=i),
26
+ locator=Loc.make('test'),
27
+ context=ctx,
28
+ )
29
+
30
+ db_visits = extract_visits(source=Source(indexer), src=source_name)
31
+ errors = visits_to_sqlite(vit=db_visits, overwrite_db=True, _db_path=tmp_path / 'promnesia.sqlite')
32
+
33
+ assert len(errors) == 0, errors
34
+
35
+ return idx
@@ -1,19 +1,18 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: promnesia
3
- Version: 1.2.20230515
3
+ Version: 1.3.20241021
4
4
  Summary: Enhancement of your browsing history
5
5
  Home-page: https://github.com/karlicoss/promnesia
6
6
  Author: Dmitrii Gerasimov
7
7
  Author-email: karlicoss@gmail.com
8
- License: UNKNOWN
9
- Platform: UNKNOWN
10
- Requires-Python: >=3.8
8
+ Requires-Python: >=3.9
9
+ License-File: LICENSE
11
10
  Requires-Dist: appdirs
12
11
  Requires-Dist: tzlocal
13
12
  Requires-Dist: more-itertools
13
+ Requires-Dist: typing-extensions
14
14
  Requires-Dist: pytz
15
- Requires-Dist: sqlalchemy
16
- Requires-Dist: cachew (>=0.8.0)
15
+ Requires-Dist: sqlalchemy >=2.0
17
16
  Requires-Dist: urlextract
18
17
  Requires-Dist: fastapi
19
18
  Requires-Dist: uvicorn[standard]
@@ -26,33 +25,31 @@ Requires-Dist: HPI ; extra == 'all'
26
25
  Requires-Dist: beautifulsoup4 ; extra == 'all'
27
26
  Requires-Dist: lxml ; extra == 'all'
28
27
  Requires-Dist: mistletoe ; extra == 'all'
29
- Requires-Dist: orgparse (>=0.3.0) ; extra == 'all'
28
+ Requires-Dist: orgparse >=0.3.0 ; extra == 'all'
30
29
  Provides-Extra: html
31
30
  Requires-Dist: beautifulsoup4 ; extra == 'html'
32
31
  Requires-Dist: lxml ; extra == 'html'
33
- Provides-Extra: linting
34
- Requires-Dist: pytest ; extra == 'linting'
35
- Requires-Dist: mypy ; extra == 'linting'
36
- Requires-Dist: lxml ; extra == 'linting'
37
32
  Provides-Extra: markdown
38
33
  Requires-Dist: mistletoe ; extra == 'markdown'
39
34
  Provides-Extra: optional
40
35
  Requires-Dist: logzero ; extra == 'optional'
41
36
  Requires-Dist: python-magic ; extra == 'optional'
42
37
  Provides-Extra: org
43
- Requires-Dist: orgparse (>=0.3.0) ; extra == 'org'
38
+ Requires-Dist: orgparse >=0.3.0 ; extra == 'org'
44
39
  Provides-Extra: telegram
45
40
  Provides-Extra: testing
46
41
  Requires-Dist: pytest ; extra == 'testing'
47
42
  Requires-Dist: pytest-timeout ; extra == 'testing'
48
43
  Requires-Dist: pytest-xdist ; extra == 'testing'
44
+ Requires-Dist: hypothesis ; extra == 'testing'
49
45
  Requires-Dist: psutil ; extra == 'testing'
50
- Requires-Dist: requests (<2.30.0) ; extra == 'testing'
51
- Requires-Dist: httpie ; extra == 'testing'
46
+ Requires-Dist: requests ; extra == 'testing'
52
47
  Requires-Dist: selenium ; extra == 'testing'
53
48
  Requires-Dist: click ; extra == 'testing'
54
- Requires-Dist: pyautogui ; extra == 'testing'
55
-
56
- UNKNOWN
57
-
49
+ Requires-Dist: ruff ; extra == 'testing'
50
+ Requires-Dist: mypy ; extra == 'testing'
51
+ Requires-Dist: lxml ; extra == 'testing'
52
+ Requires-Dist: loguru ; extra == 'testing'
53
+ Provides-Extra: testing-gui
54
+ Requires-Dist: pyautogui ; extra == 'testing-gui'
58
55
 
@@ -0,0 +1,83 @@
1
+ promnesia/__init__.py,sha256=8ZrCJe2kJb0DuYIiNeiUm0XU0nsjTlctcjdFoy9DVYw,457
2
+ promnesia/__main__.py,sha256=y7Jgcc1uSKbxKhFDrlBlYh75kZm_unS9vH_3HvWCpnk,15421
3
+ promnesia/cannon.py,sha256=TZ4b5P0qZpo0CVKCQLthyPFtAW5DYZT5ylttr5TmikI,24522
4
+ promnesia/common.py,sha256=iUQh3Z-XqWxOPqnOHL8sBHEEJcGTztITBXnYizA02zs,20098
5
+ promnesia/compare.py,sha256=vREaDTOjrGG43qOZEq2E9jV7fihO0U-KBMuguUMNgnk,4572
6
+ promnesia/compat.py,sha256=cxk8ZOv0LnSwLCab_UrsxQWcKiiZnWgf2xK13L-ql4w,456
7
+ promnesia/config.py,sha256=hFHdFbcB2q35gU1zlmtYj6VkHR_1SSH4U2M5G4HQa_g,4721
8
+ promnesia/extract.py,sha256=m6D-QVA54ldcffcAby8w2OIDg0GPmAgDSGVWhSTwA60,2810
9
+ promnesia/kjson.py,sha256=GPpeIpvtXwaocBw1W1QzPWN8UJcF0USSlaMxRSEkH-U,3400
10
+ promnesia/logging.py,sha256=z3Otc_JzZREnMG0QerqoOR6zFJ6Ls6ATGydczCADZSY,5923
11
+ promnesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ promnesia/server.py,sha256=LBka2jYuzISVAyNIQaf8IRGIbcpSFo1g-vn8TI5VOhE,14647
13
+ promnesia/sqlite.py,sha256=4jZhqMGviSZK5wNqd7PbVC0z-D3UWyeM0RXrabAruZ8,1378
14
+ promnesia/database/common.py,sha256=9iENKM0pRL1PKrSAhGzNreDFsQNQBIGZWKuV5E8aalw,1843
15
+ promnesia/database/dump.py,sha256=v5m5mdnY7Tu7Yu-mlQHAFW46Kgt4dJRIOWOGOs88gEY,6116
16
+ promnesia/database/load.py,sha256=kP-HdNL_3q94P_Cj5pnxarHqFgCVJFrsbr0vogDRtFU,1357
17
+ promnesia/misc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ promnesia/misc/config_example.py,sha256=MXxJCVjm9MrEwPR8SHTwkJRDcqWcqJWtfJg_M6yhRMo,364
19
+ promnesia/misc/install_server.py,sha256=NEEGvpun7_lMBGXKvRGrq5wcGT_xgMm7MvPcftR15pI,4757
20
+ promnesia/sources/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ promnesia/sources/auto.py,sha256=hiecNxttUGFskR5H2W35K-6daEhhiPpH2ZRpOZBBr50,11695
22
+ promnesia/sources/auto_logseq.py,sha256=1npVDEXB8wAM6XwTku0nZb57yQ9mM7vWct9albgvGxw,445
23
+ promnesia/sources/auto_obsidian.py,sha256=UoVWAkdCbh7Ij3JFIy6ylYOd6776c7-z2xaR6b8snYc,205
24
+ promnesia/sources/browser.py,sha256=qmuDzu0iJjpQc-WX2LEZ3TnpHhb9H-b5H-hZV7lbVkY,3087
25
+ promnesia/sources/browser_legacy.py,sha256=rNZydnWnnQ3sAzgRgFYnuqQKHUg8MxS5iiAmHAh9rV8,10429
26
+ promnesia/sources/demo.py,sha256=TQVVl6089TlEntiGSO-9EhqXBf9U_ipUnhsVqDCOspg,1033
27
+ promnesia/sources/fbmessenger.py,sha256=Dpvxh_5_ozaqGOSHxCApoIda58Z33r0_N6Uk2BIMde8,997
28
+ promnesia/sources/filetypes.py,sha256=2d6taAuGT4jvfCEqF9US8jukT6sBuN0wBEPJToCKiww,3643
29
+ promnesia/sources/github.py,sha256=arDUZShPi8Dzz9EhYfk4j1OK3ugsADsjAhfXy00ng0c,2777
30
+ promnesia/sources/guess.py,sha256=DMC7pj7rAA5yGZ6hMp31y4QvSugVNA9osexBLd5oCmc,803
31
+ promnesia/sources/hackernews.py,sha256=zocx4XrP2QY5UW-ErSMej7lrYZH0xmotqaIBVGyNH7M,1247
32
+ promnesia/sources/hpi.py,sha256=cHQlEJAH1EeTiawB1kcjXPt4UYDL1ZTNIHadBvd5QH4,383
33
+ promnesia/sources/html.py,sha256=ill3XtIObZoK1a8FY6OZGoOwmqjtSiiiwmqjzSbZhQQ,1153
34
+ promnesia/sources/hypothesis.py,sha256=qbiP6xJN53A6YkSUgA7hUa3ZD4KmtUnhJfAzXUHdt0o,1387
35
+ promnesia/sources/instapaper.py,sha256=zIq6AClPb8Zfkdft7U4VgEgODlEeQRowygGz06DaGUQ,988
36
+ promnesia/sources/markdown.py,sha256=KHHwxezCQRpcJLoyt_qB44NQwZGL7BnPT-Ehud21eR4,3871
37
+ promnesia/sources/org.py,sha256=BD1DpDcxXWWjirKnrovY2tDQpgj5YAt175z45Z_o0jI,5589
38
+ promnesia/sources/plaintext.py,sha256=8aYkmBYmXKnxyshWqgwUgQ03oG-OMKRi7DXsWtcgpVA,3138
39
+ promnesia/sources/pocket.py,sha256=PWmjAgg8nSDubBv_4WuWmyupB6NP8SAuuKiV8ZLE4xY,1133
40
+ promnesia/sources/reddit.py,sha256=u-Ou0xTZO-s9SM_GKHfNawLV5Yv7PmH4lggy_yq1XnA,5630
41
+ promnesia/sources/roamresearch.py,sha256=_WKurvGea9JHMKlapQyH44Kfg-AD40fb7xse26It0Fo,1077
42
+ promnesia/sources/rss.py,sha256=6Ijy6omXGjzMLkmwijq6JBRIY7ruArb-hXppRYY4ln0,824
43
+ promnesia/sources/shellcmd.py,sha256=bXWfHv6XFmVUQHUUWn2E_C96tIdmIxb1-yqAQRtTYIk,2739
44
+ promnesia/sources/signal.py,sha256=4ZBUXQoab0OG1r8ieZhSHpMNrzsFDomVXBhqb8hRYPA,14793
45
+ promnesia/sources/smscalls.py,sha256=N6jMHHr4bUlQORQcTVeGsdrmW4ep7jpCEFSi26sRpxY,774
46
+ promnesia/sources/stackexchange.py,sha256=_s8HJfhblVNspvbLEnuse5J8BvFFrboIdv5jURTZ2eE,658
47
+ promnesia/sources/takeout.py,sha256=mv6j8mkDb8lJsv41iBsJBzFpbGp7jXIBWYcAFMTvdls,8017
48
+ promnesia/sources/takeout_legacy.py,sha256=iaSLu73RutEx1UW4m-n3hk7-ISlVl2d-W0ZKP__20XU,4130
49
+ promnesia/sources/telegram.py,sha256=hSI6zxQh2zqdBiIAGJzY2Vi5A_233jDy0tGU0Q-Q-EU,2979
50
+ promnesia/sources/telegram_legacy.py,sha256=gRe6Exw-svMQZhlussmBGg0EJq81XadSa5-mh2m8ztI,4524
51
+ promnesia/sources/twitter.py,sha256=_tIU0rQ3b96WXKIPJQRyRAzG7AAnPp9ANytrcVJ5Z0U,1886
52
+ promnesia/sources/vcs.py,sha256=wMcgXr1Nd7DxM5j405ip4SPxJIqhdY4SePpp9qPL9q8,1654
53
+ promnesia/sources/viber.py,sha256=C7e9AxN5sgC-SLyCdUapzwC3eGuA0WQUGfO8Br5r53A,5990
54
+ promnesia/sources/website.py,sha256=GbDZChKK1_KLWXiLUUcZLOvep6h2AWmqECudwmupSl8,1811
55
+ promnesia/sources/zulip.py,sha256=GN8YiUmFa3jLXFmNst47YILu0U-kPawbxDKY-Mtobf0,770
56
+ promnesia/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
+ promnesia/tests/common.py,sha256=2D9YspEWhKplbG6ZrVACkBjp_NEJB7qaXzOMjWHitCY,4092
58
+ promnesia/tests/server_helper.py,sha256=Z1Rc_88WmOwukCyVR5Cb4XaXg4xewHiy4vlFAxQkoBU,2199
59
+ promnesia/tests/test_cannon.py,sha256=VZPWZAtcAdRzQwVJ01qUIgvxB3VJl72Gqg0GI9tr8To,12711
60
+ promnesia/tests/test_cli.py,sha256=g-9mgs4_tu6bGNZz3_swP3oj1L2V_o9ycjlFzIR9WYE,1404
61
+ promnesia/tests/test_compare.py,sha256=JDQzFWaIMkKCn6YYKJPrZjdbZ6LZn5ig793q62LwSFc,945
62
+ promnesia/tests/test_config.py,sha256=VqX2R0UcXmxp8O-p6C5Unk9W2Iv3RkIhvKe3ga84GHc,6534
63
+ promnesia/tests/test_db_dump.py,sha256=PMGiTnp_cfDxLUfzYrchKKewoF84q6TooEw445K-WiA,6766
64
+ promnesia/tests/test_extract.py,sha256=kD2iNodCj2OHM7_sQ_3DTRHJMZItJ7FuE855GpxZ3jM,2340
65
+ promnesia/tests/test_extract_urls.py,sha256=Ybez16VdYoveL9b_50Ca9vbMogWHOvFqDOJPGZrzQJg,1961
66
+ promnesia/tests/test_indexer.py,sha256=Oo93qqPrP0xXY1XhcRQkoJuEGpREfs6qoH335f_5_dI,8981
67
+ promnesia/tests/test_server.py,sha256=c1HxEKlqTxbBrKu7tmg58G8Z9E4jNcxq-YENxxfVqIQ,10786
68
+ promnesia/tests/test_traverse.py,sha256=DUN-NbrZ8b6MKrtZaOavAnZelMPk_aqN2ylvRV7UqHo,1350
69
+ promnesia/tests/utils.py,sha256=TsRaLYN9slHHoNJmPSKwgoP82ICHBvEjT2G4scAraIQ,1136
70
+ promnesia/tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ promnesia/tests/sources/test_auto.py,sha256=wWqf7x_BXeVSYTDIaD6ZXqqRvqk312biY4iOGcMa4PY,1848
72
+ promnesia/tests/sources/test_filetypes.py,sha256=6xhpN9hQb2U6FF4lAEIXWupk-t_HxQjNfk9A6krEnx4,1359
73
+ promnesia/tests/sources/test_hypothesis.py,sha256=4anFCfBqzJueiDhhg2WtvITMIofAWRMB2-Ja1Cb2beY,1400
74
+ promnesia/tests/sources/test_org.py,sha256=cefCAc7JVtrzHblbLHYB28tsjzfV1b4h80_JRO1Zamk,2572
75
+ promnesia/tests/sources/test_plaintext.py,sha256=Bn7v2HhL1FSLqCKPy-BX5OLy1PlWIyYjvRTLAWrXVWg,827
76
+ promnesia/tests/sources/test_shellcmd.py,sha256=K4hDQl3yd4t-6JFeDtB1gtH2XFImSYdYe1pG0UOO-Uw,678
77
+ promnesia/tests/sources/test_takeout.py,sha256=HVreW_4pZP8TjGNmrJva5JJfkexmOwwr7cRxwU1Qg_Q,1557
78
+ promnesia-1.3.20241021.dist-info/LICENSE,sha256=rgO9acPmnw53ZBxiXBdp8kfxmRcekhg_Q7HN65BPihs,1074
79
+ promnesia-1.3.20241021.dist-info/METADATA,sha256=YLJgGCkJ4LTXKid3Ieq-hBk5aJVc3c000Diutdk-RlI,1903
80
+ promnesia-1.3.20241021.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
81
+ promnesia-1.3.20241021.dist-info/entry_points.txt,sha256=hz1qfzQSRh4kkVkJWk4hnYqE9A1nobEbKlLG_0nNxzE,54
82
+ promnesia-1.3.20241021.dist-info/top_level.txt,sha256=7yvIpooFiuNLf9yLdu9MTADz57z0YTAqSu7aSG9ujSU,10
83
+ promnesia-1.3.20241021.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,2 @@
1
1
  [console_scripts]
2
2
  promnesia = promnesia.__main__:main
3
-
promnesia/dump.py DELETED
@@ -1,105 +0,0 @@
1
- from pathlib import Path
2
- import shutil
3
- from typing import List, Set, Iterable
4
-
5
- from more_itertools import chunked
6
-
7
- from sqlalchemy import create_engine, MetaData, Table, event, text
8
-
9
- from cachew import NTBinder
10
-
11
- from .common import get_logger, DbVisit, get_tmpdir, Res, now_tz, Loc
12
- from . import config
13
-
14
-
15
- # NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
16
- # since as far as sql is concerned it should all be in the same transaction. only a guess
17
- # not sure it's the proper way to handle it
18
- # see test_index_many
19
- _CHUNK_BY = 10
20
-
21
- # I guess 1 hour is definitely enough
22
- _CONNECTION_TIMEOUT_SECONDS = 3600
23
-
24
- # returns critical warnings
25
- def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
26
- logger = get_logger()
27
- db_path = config.get().db
28
-
29
- now = now_tz()
30
- ok = 0
31
- errors = 0
32
- def vit_ok() -> Iterable[DbVisit]:
33
- nonlocal errors, ok
34
- for v in vit:
35
- if isinstance(v, DbVisit):
36
- ok += 1
37
- yield v
38
- else:
39
- errors += 1
40
- # conform to the schema and dump. can't hurt anyway
41
- ev = DbVisit(
42
- norm_url='<error>',
43
- orig_url='<error>',
44
- dt=now,
45
- locator=Loc.make('<errror>'),
46
- src='error',
47
- # todo attach backtrace?
48
- context=repr(v),
49
- )
50
- yield ev
51
-
52
- tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
53
- if overwrite_db:
54
- # here we don't need timeout, since it's a brand new DB
55
- engine = create_engine(f'sqlite:///{tpath}')
56
- else:
57
- # here we need a timeout, othewise concurrent indexing might not work
58
- # (note that this also needs WAL mode)
59
- # see test_concurrent_indexing
60
- engine = create_engine(f'sqlite:///{db_path}', connect_args={'timeout': _CONNECTION_TIMEOUT_SECONDS})
61
-
62
- # using WAL keeps database readable while we're writing in it
63
- # this is tested by test_query_while_indexing
64
- def enable_wal(dbapi_con, con_record) -> None:
65
- dbapi_con.execute('PRAGMA journal_mode = WAL')
66
- event.listen(engine, 'connect', enable_wal)
67
-
68
- binder = NTBinder.make(DbVisit)
69
- meta = MetaData()
70
- table = Table('visits', meta, *binder.columns)
71
-
72
- cleared: Set[str] = set()
73
- ncleared = 0
74
- with engine.begin() as conn:
75
- table.create(conn, checkfirst=True)
76
-
77
- for chunk in chunked(vit_ok(), n=_CHUNK_BY):
78
- srcs = set(v.src or '' for v in chunk)
79
- new = srcs.difference(cleared)
80
-
81
- for src in new:
82
- conn.execute(table.delete().where(table.c.src == src))
83
- cursor = conn.execute(text("SELECT changes()")).fetchone()
84
- assert cursor is not None
85
- ncleared += cursor[0]
86
- cleared.add(src)
87
-
88
- bound = [binder.to_row(x) for x in chunk]
89
- # pylint: disable=no-value-for-parameter
90
- conn.execute(table.insert().values(bound))
91
- engine.dispose()
92
-
93
- if overwrite_db:
94
- shutil.move(str(tpath), str(db_path))
95
-
96
- errs = '' if errors == 0 else f', {errors} ERRORS'
97
- total = ok + errors
98
- what = 'overwritten' if overwrite_db else 'updated'
99
- logger.info(
100
- '%s database "%s". %d total (%d OK%s, %d cleared, +%d more)',
101
- what, db_path, total, ok, errs, ncleared, ok - ncleared)
102
- res: List[Exception] = []
103
- if total == 0:
104
- res.append(RuntimeError('No visits were indexed, something is probably wrong!'))
105
- return res