promnesia 1.1.20230129__py3-none-any.whl → 1.2.20240810__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. promnesia/__main__.py +58 -50
  2. promnesia/cannon.py +4 -4
  3. promnesia/common.py +57 -38
  4. promnesia/compare.py +3 -2
  5. promnesia/compat.py +6 -65
  6. promnesia/config.py +4 -2
  7. promnesia/database/common.py +66 -0
  8. promnesia/database/dump.py +187 -0
  9. promnesia/{read_db.py → database/load.py} +10 -11
  10. promnesia/extract.py +1 -0
  11. promnesia/kjson.py +1 -1
  12. promnesia/logging.py +14 -14
  13. promnesia/misc/__init__.pyi +0 -0
  14. promnesia/misc/config_example.py +1 -2
  15. promnesia/misc/install_server.py +5 -4
  16. promnesia/server.py +24 -24
  17. promnesia/sources/__init__.pyi +0 -0
  18. promnesia/sources/auto.py +12 -7
  19. promnesia/sources/browser.py +80 -293
  20. promnesia/sources/browser_legacy.py +298 -0
  21. promnesia/sources/demo.py +18 -2
  22. promnesia/sources/filetypes.py +8 -0
  23. promnesia/sources/github.py +2 -2
  24. promnesia/sources/hackernews.py +1 -2
  25. promnesia/sources/hypothesis.py +1 -1
  26. promnesia/sources/markdown.py +15 -15
  27. promnesia/sources/org.py +7 -3
  28. promnesia/sources/plaintext.py +3 -1
  29. promnesia/sources/reddit.py +2 -2
  30. promnesia/sources/rss.py +5 -1
  31. promnesia/sources/shellcmd.py +6 -2
  32. promnesia/sources/signal.py +29 -20
  33. promnesia/sources/smscalls.py +8 -1
  34. promnesia/sources/stackexchange.py +2 -2
  35. promnesia/sources/takeout.py +132 -12
  36. promnesia/sources/takeout_legacy.py +10 -2
  37. promnesia/sources/telegram.py +79 -123
  38. promnesia/sources/telegram_legacy.py +117 -0
  39. promnesia/sources/vcs.py +1 -1
  40. promnesia/sources/viber.py +6 -15
  41. promnesia/sources/website.py +1 -1
  42. promnesia/sqlite.py +42 -0
  43. promnesia/tests/__init__.py +0 -0
  44. promnesia/tests/common.py +137 -0
  45. promnesia/tests/server_helper.py +64 -0
  46. promnesia/tests/sources/__init__.py +0 -0
  47. promnesia/tests/sources/test_auto.py +66 -0
  48. promnesia/tests/sources/test_filetypes.py +42 -0
  49. promnesia/tests/sources/test_hypothesis.py +39 -0
  50. promnesia/tests/sources/test_org.py +65 -0
  51. promnesia/tests/sources/test_plaintext.py +26 -0
  52. promnesia/tests/sources/test_shellcmd.py +22 -0
  53. promnesia/tests/sources/test_takeout.py +58 -0
  54. promnesia/tests/test_cannon.py +325 -0
  55. promnesia/tests/test_cli.py +42 -0
  56. promnesia/tests/test_compare.py +30 -0
  57. promnesia/tests/test_config.py +290 -0
  58. promnesia/tests/test_db_dump.py +223 -0
  59. promnesia/tests/test_extract.py +61 -0
  60. promnesia/tests/test_extract_urls.py +43 -0
  61. promnesia/tests/test_indexer.py +245 -0
  62. promnesia/tests/test_server.py +292 -0
  63. promnesia/tests/test_traverse.py +41 -0
  64. promnesia/tests/utils.py +35 -0
  65. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/METADATA +14 -19
  66. promnesia-1.2.20240810.dist-info/RECORD +83 -0
  67. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/WHEEL +1 -1
  68. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/entry_points.txt +0 -1
  69. promnesia/dump.py +0 -105
  70. promnesia-1.1.20230129.dist-info/RECORD +0 -55
  71. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/LICENSE +0 -0
  72. {promnesia-1.1.20230129.dist-info → promnesia-1.2.20240810.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,22 @@
1
+ from ...common import _is_windows, Source
2
+ from ...extract import extract_visits
3
+ from ...sources import shellcmd
4
+
5
+ import pytest
6
+
7
+ from ..common import get_testdata
8
+
9
+
10
+ @pytest.mark.skipif(_is_windows, reason="no grep on windows")
11
+ def test_via_grep() -> None:
12
+
13
+ visits = list(extract_visits(
14
+ Source(
15
+ shellcmd.index,
16
+ # meh. maybe should deprecate plain string here...
17
+ r"""grep -Eo -r --no-filename (http|https)://\S+ """ + str(get_testdata('custom')),
18
+ ),
19
+ src='whatever',
20
+ ))
21
+ # TODO I guess filtering of equivalent urls should rather be tested on something having context (e.g. org mode)
22
+ assert len(visits) == 5
@@ -0,0 +1,58 @@
1
+ from datetime import datetime, timezone
2
+
3
+ from ...common import Source
4
+ from ...extract import extract_visits
5
+ from ...sources import takeout
6
+
7
+ import pytest
8
+
9
+ from ..common import get_testdata, unwrap
10
+
11
+ from my.core.cfg import tmp_config
12
+
13
+
14
+ # TODO apply in conftest so it's used in all tests?
15
+ @pytest.fixture
16
+ def no_cachew():
17
+ from my.core.cachew import disabled_cachew
18
+
19
+ with disabled_cachew():
20
+ yield
21
+
22
+
23
+ # todo testing this logic probably belongs to hpi or google_takeout_export, but whatever
24
+ def test_takeout_directory(no_cachew) -> None:
25
+ class config:
26
+ class google:
27
+ takeout_path = get_testdata('takeout')
28
+
29
+ with tmp_config(modules='my.google.takeout.*', config=config):
30
+ visits = list(extract_visits(Source(takeout.index), src='takeout'))
31
+
32
+ assert len(visits) == 3
33
+ assert all(unwrap(v).dt.tzinfo is not None for v in visits)
34
+
35
+
36
+ def test_takeout_zip(no_cachew) -> None:
37
+ class config:
38
+ class google:
39
+ takeout_path = get_testdata('takeout-20150518T000000Z.zip')
40
+
41
+ with tmp_config(modules='my.google.takeout.*', config=config):
42
+ visits = list(extract_visits(Source(takeout.index), src='takeout'))
43
+
44
+ assert len(visits) == 3
45
+ assert all(unwrap(v).dt.tzinfo is not None for v in visits)
46
+
47
+ [vis] = [v for v in visits if unwrap(v).norm_url == 'takeout.google.com/settings/takeout']
48
+
49
+ edt = datetime(
50
+ year=2018,
51
+ month=9,
52
+ day=18,
53
+ hour=5,
54
+ minute=48,
55
+ second=23,
56
+ tzinfo=timezone.utc,
57
+ )
58
+ assert unwrap(vis).dt == edt
@@ -0,0 +1,325 @@
1
+ from typing import cast
2
+
3
+ import pytest
4
+
5
+ from ..cannon import canonify, CanonifyException
6
+
7
+ # TODO should actually understand 'sequences'?
8
+ # e.g.
9
+ # https://www.scottaaronson.com/blog/?p=3167#comment-1731882 is kinda hierarchy of scottaaronson.com, post 3167 and comment 1731882
10
+ # but when working with it from server, would be easier to just do multiple queries I guess..
11
+ # https://www.scottaaronson.com/blog/?p=3167 is kind ahierarchy of scottaaronson.com ;
12
+
13
+
14
+ param = pytest.mark.parametrize
15
+
16
+
17
+ # mark stuff that in interesting as a testcase, but I'm not sure about yet
18
+ TODO = cast(str, object())
19
+
20
+
21
+ def check(url, expected):
22
+ if expected is TODO:
23
+ pytest.skip(f"'{url}' will be handled later")
24
+ assert canonify(url) == expected
25
+
26
+
27
+ # TODO assume spaces are not meaninfgul??
28
+ # then could align URLs etc?
29
+
30
+ @param('url,expected', [(
31
+ 'https://www.youtube.com/watch?t=491s&v=1NHbPN9pNPM&index=63&list=WL',
32
+ # NOTE: t= reordered, makes it more hierarchical
33
+ # list as well, I guess makes the most sense to keep it at the very end.. since lists are more like tags
34
+ 'youtube.com/watch?v=1NHbPN9pNPM&t=491s&list=WL'
35
+ ), (
36
+ 'youtube.com/watch?v=wHrCkyoe72U&feature=share&time_continue=6',
37
+ 'youtube.com/watch?v=wHrCkyoe72U'
38
+ ), (
39
+ 'youtube.com/embed/nyc6RJEEe0U?feature=oembed',
40
+ 'youtube.com/watch?v=nyc6RJEEe0U'
41
+ ), (
42
+ 'https://youtu.be/iCvmsMzlF7o?list=WL',
43
+ 'youtube.com/watch?v=iCvmsMzlF7o&list=WL'
44
+ ),
45
+ # TODO can even be like that or contain timestamp (&t=)
46
+ # TODO warn if param already present? shouldn't happen..
47
+
48
+ # TODO could be interesting to do automatic rule extraction by querying one represnetative and then extracting canonical
49
+
50
+ # TODO national domains don't matter for youtube
51
+
52
+ # [*, 'youtube', ANY_DOMAIN] / 'embed' -> 'youtube.com/watch'
53
+ # TODO use regex backrefs?
54
+ #
55
+ (
56
+ 'm.youtube.com/watch?v=Zn6gV2sdl38',
57
+ 'youtube.com/watch?v=Zn6gV2sdl38'
58
+ ),
59
+
60
+ # ( "https//youtube.com/playlist?list=PLeOfc0M-50LmJtZwyOfw6aVopmIbU1t7t"
61
+ # , "youtube.com/playlist?list=PLeOfc0M-50LmJtZwyOfw6aVopmIbU1t7t"
62
+ # ),
63
+ # TODO perhaps it should result in video link + sibling link?
64
+ # when exploring other people's playlists this could be quite useful?
65
+
66
+ # ( "https://www.youtube.com/watch?v=1NHbPN9pNPM&index=63&list=WL&t=491s"
67
+ # , "youtube.com/watch?v=1NHbPN9pNPM&list=WL" # TODO not so sure about &t, it's sort of useful
68
+ # ),
69
+ # TODO
70
+ # youtube.com/user/magauchsein/playlists?sort=dd&view=50&shelf_id=14
71
+ # youtube.com/user/TheChemlife/videos?view=0&sort=p&flow=grid
72
+ ])
73
+ def test_youtube(url, expected):
74
+ assert canonify(url) == expected
75
+
76
+
77
+ @param('url,expected', [(
78
+ 'https://web.archive.org/web/20090902224414/http://reason.com/news/show/119237.html',
79
+ 'reason.com/news/show/119237.html',
80
+ )])
81
+ def test_archiveorg(url, expected):
82
+ assert canonify(url) == expected
83
+
84
+
85
+ # ugh. good example of motication for cannon.py?
86
+ @param('url,expected', [(
87
+ 'https://news.ycombinator.com/from?site=jacopo.io',
88
+ 'jacopo.io',
89
+ ), (
90
+ 'https://news.ycombinator.com/item?id=25099862',
91
+ 'news.ycombinator.com/item?id=25099862',
92
+ ), (
93
+ 'https://news.ycombinator.com/reply?id=25100035&goto=item%3Fid%3D25099862%2325100035',
94
+ TODO,
95
+ )])
96
+ def test_hackernews(url, expected):
97
+ check(url, expected)
98
+
99
+
100
+ @param('url, expected', [
101
+ ( 'https://www.reddit.com/r/firefox/comments/bbugc5/firefox_bans_free_speech_commenting_plugin/?ref=readnext'
102
+ , 'reddit.com/r/firefox/comments/bbugc5/firefox_bans_free_speech_commenting_plugin',
103
+ ),
104
+
105
+ ( 'https://www.reddit.com/r/selfhosted/comments/8j8mo3/what_are_you_self_hosting/dz19gh9/?utm_content=permalink&utm_medium=user&utm_source=reddit&utm_name=u_karlicoss'
106
+ , 'reddit.com/r/selfhosted/comments/8j8mo3/what_are_you_self_hosting/dz19gh9',
107
+ )
108
+ # TODO hmm. parent relationship can just rely on urls for reddit
109
+ # just need to support it in server I suppose
110
+
111
+ # TODO search queries?
112
+ # https://www.reddit.com/search?q=AutoValue
113
+
114
+ # TODO def need better markdown handling
115
+ # https://reddit.com/r/intj/comments/cmof04/me_irl/ew4a3dw/][ Me_irl]
116
+ # reddit.com/r/intj/comments/cmof04/me_irl/ew4a3dw/%5D%5BMe_irl%5D
117
+
118
+
119
+
120
+ ])
121
+ def test_reddit(url, expected):
122
+ assert canonify(url) == expected
123
+
124
+ # ugh. good example of motication for cannon.py?
125
+ @param('url,expected', [
126
+ ( 'https://app.getpocket.com/read/3479402594'
127
+ , 'app.getpocket.com/read/3479402594'
128
+ ),
129
+
130
+ ( 'https://getpocket.com/read/3479402594'
131
+ , 'app.getpocket.com/read/3479402594'
132
+ ),
133
+ ])
134
+ def test_pocket(url, expected):
135
+ assert canonify(url) == expected
136
+
137
+ @pytest.mark.parametrize("url,expected", [
138
+ # TODO ?? 'https://groups.google.com/a/list.hypothes.is/forum/#!topic/dev/kcmS7H8ssis',
139
+ #
140
+ # TODO FIXME fragment handling
141
+ # ( "https://www.scottaaronson.com/blog/?p=3167#comment-1731882"
142
+ # , "scottaaronson.com/blog/?p=3167#comment-1731882"
143
+ # ),
144
+
145
+
146
+ # TODO FIXME fragment handling
147
+ # ( "https://en.wikipedia.org/wiki/tendon#cite_note-14"
148
+ # , "en.wikipedia.org/wiki/tendon#cite_note-14"
149
+ # ),
150
+
151
+ # TODO FIXME fragment handling
152
+ # ( "https://physicstravelguide.com/experiments/aharonov-bohm#tab__concrete"
153
+ # , "physicstravelguide.com/experiments/aharonov-bohm#tab__concrete"
154
+ # ),
155
+
156
+ ( "https://github.com/search?o=asc&q=track&s=stars&type=Repositories"
157
+ , "github.com/search?q=track"
158
+ ),
159
+ ( "https://80000hours.org/career-decision/article/?utm_source=The+EA+Newsletter&utm_campaign=04ca3c2244-EMAIL_CAMPAIGN_2019_04_03_04_26&utm_medium=email&utm_term=0_51c1df13ac-04ca3c2244-318697649"
160
+ , "80000hours.org/career-decision/article"
161
+ ),
162
+ ( "https://www.facebook.com/photo.php?fbid=24147689823424326&set=pcb.2414778905423667&type=3&theater"
163
+ , "facebook.com/photo.php?fbid=24147689823424326"
164
+ ),
165
+ ( "https://play.google.com/store/apps/details?id=com.faultexception.reader&hl=en"
166
+ , "play.google.com/store/apps/details?id=com.faultexception.reader"
167
+ ),
168
+ # TODO it also got &p= parameter, which refers to page... not sure how to handle this
169
+ # news.ycombinator.com/item?id=15451442&p=2
170
+ ( "https://news.ycombinator.com/item?id=12172351"
171
+ , "news.ycombinator.com/item?id=12172351"
172
+ ),
173
+ ( "https://urbandictionary.com/define.php?term=Belgian%20Whistle"
174
+ , "urbandictionary.com/define.php?term=Belgian%20Whistle"
175
+ ),
176
+ ( "https://en.wikipedia.org/wiki/Dinic%27s_algorithm"
177
+ , "en.wikipedia.org/wiki/Dinic%27s_algorithm"
178
+ ),
179
+
180
+ ( "zoopla.co.uk/to-rent/details/42756337#D0zlBWeD4X85odsR.97"
181
+ , "zoopla.co.uk/to-rent/details/42756337"
182
+ ),
183
+
184
+ ( "withouthspec.co.uk/rooms/16867952?guests=2&adults=2&location=Berlin%2C+Germany&check_in=2017-08-16&check_out=2017-08-20"
185
+ , "withouthspec.co.uk/rooms/16867952"
186
+ ),
187
+
188
+ ( "amp.theguardian.com/technology/2017/oct/09/mark-zuckerberg-facebook-puerto-rico-virtual-reality"
189
+ , "theguardian.com/technology/2017/oct/09/mark-zuckerberg-facebook-puerto-rico-virtual-reality",
190
+ ),
191
+
192
+ ( "https://answers.yahoo.com/question/index?qid=20071101131442AAk9bGp"
193
+ , "answers.yahoo.com/question/index?qid=20071101131442AAk9bGp"
194
+ ),
195
+ ( "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010"
196
+ , "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%E2%80%93-2010"
197
+ ),
198
+ ( "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-–-2010"
199
+ , "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%E2%80%93-2010"
200
+ ),
201
+
202
+ ( "https://spoonuniversity.com/lifestyle/marmite-ways-to-eat-it&usg=AFQjCNH4s1SOEjlpENlfPV5nuvADZpSdow"
203
+ , "spoonuniversity.com/lifestyle/marmite-ways-to-eat-it"
204
+ ),
205
+
206
+ ( 'https://google.co.uk/amp/s/amp.reddit.com/r/androidapps/comments/757e2t/swiftkey_or_gboard'
207
+ , 'reddit.com/r/androidapps/comments/757e2t/swiftkey_or_gboard'
208
+ ),
209
+
210
+ # should sort query params
211
+ ( 'https://www.youtube.com/watch?v=hvoQiF0kBI8&list=WL&index=2'
212
+ , 'youtube.com/watch?v=hvoQiF0kBI8&list=WL',
213
+ ),
214
+ ( 'https://www.youtube.com/watch?list=WL&v=hvoQiF0kBI8&index=2'
215
+ , 'youtube.com/watch?v=hvoQiF0kBI8&list=WL',
216
+ ),
217
+
218
+ # TODO def need to allow the _user_ to define the rules.
219
+ # no way I can predict everything
220
+ # basically, allow *interactively* select
221
+ # also allow introspection, which rule matched?
222
+ ( 'https://bbs.archlinux.org/viewtopic.php?id=212740'
223
+ , 'bbs.archlinux.org/viewtopic.php?id=212740',
224
+ ),
225
+
226
+ ( 'https://ubuntuforums.org/showthread.php?t=1403470&s=0dd67bdb12559c22e73a220752db50c7&p=8806195#post8806195'
227
+ , 'ubuntuforums.org/showthread.php?t=1403470&p=8806195',
228
+ ),
229
+
230
+ ( 'https://arstechnica.com/?p=1371299',
231
+ 'arstechnica.com/?p=1371299',
232
+ # eh. it's a redirect to https://arstechnica.com/information-technology/2018/09/dozens-of-ios-apps-surreptitiously-share-user-location-data-with-tracking-firms/
233
+ # however in the page body there is <link rel="shorturl" href="https://arstechnica.com/?p=1371299"> ...
234
+ ),
235
+
236
+ # ( "gwern.net/DNB+FAQ"
237
+ # , "TODO" # ???
238
+ # ),
239
+
240
+ # TODO shit. is that normal??? perhaps need to manually move fragment?
241
+ # SplitResult(scheme='https', netloc='unix.stackexchange.com', path='/questions/171603/convert-file-contents-to-lower-case/171708', query='', fragment='171708&usg=AFQjCNEFCGqCAa4P4Zlu2x11bThJispNxQ')
242
+ # ( "https://unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708&usg=AFQjCNEFCGqCAa4P4Zlu2x11bThJispNxQ"
243
+ # , "unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708"
244
+ # )
245
+ ])
246
+ def test(url, expected):
247
+ assert canonify(url) == expected
248
+ # TODO github queries
249
+ # github.com/search?l=Python&q=reddit+backup
250
+ # github.com/search?p=3&q=ipynb+language%3AHaskell
251
+ # github.com/search?q=kobo+ExtraData
252
+ # github.com/search?q=what-universal-human-experiences-are-you-missing-without-realizing-it
253
+
254
+ # TODO git+https://github.com/expectocode/telegram-export@master
255
+ # TODO again, for that actually sequence would be good...
256
+
257
+ # TODO "https://twitter.com/search?q=pinboard search&src=typd"
258
+
259
+ # TODO https://www.zalando-lounge.ch/#/
260
+ # TODO m.facebook.com
261
+ # TODO [R('^(youtube|urbandictionary|tesco|scottaaronson|answers.yahoo.com|code.google.com)') , None],
262
+
263
+
264
+
265
+ # TODO
266
+ # amazon.co.uk/gp/offer-listing/B00525XKL4/ref=dp_olp_new
267
+ # amazon.co.uk/gp/offer-listing/B00525XKL4/ref=olp_twister_child
268
+
269
+ # TODO
270
+ # en.wikipedia.org/wiki/S&P_500_Index
271
+
272
+
273
+ # TODO
274
+ # google.co.uk/maps/place/Hackney+Bureau/@51.5293789,-0.0527919,16.88z/data=!bla-bla!-bla
275
+
276
+
277
+ # TODO
278
+ # perhaps, disable utf8 everywhere?
279
+ # github.com/search?utf8=%E2%9C%93&q=%22My+Clippings.txt%22
280
+
281
+ # TODO FIXME fragment handling
282
+ # ( "https://www.scottaaronson.com/blog/?p=3167#comment-1731882"
283
+ # , "scottaaronson.com/blog/?p=3167#comment-1731882"
284
+ # ),
285
+
286
+ @pytest.mark.parametrize("urls", [
287
+ {
288
+ "launchpad.net/ubuntu/%2Bsource/okular",
289
+ "launchpad.net/ubuntu/+source/okular",
290
+ },
291
+ {
292
+ "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-–-2010",
293
+ "flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010",
294
+ "https://flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010/&usg=AFQjCNEZsEGz9rqpWqlFXR5Tc7pkCKY5sQ",
295
+ },
296
+ ])
297
+ def test_same_norm(urls):
298
+ urls = list(sorted(urls))
299
+ u0 = urls[0]
300
+ c0 = canonify(u0)
301
+ for u in urls[1:]:
302
+ c = canonify(u)
303
+ assert c0 == c, f'Expected {u0} and {u} to be same canonically; got {c0} and {c} instead'
304
+
305
+ def test_error():
306
+ # canonify('  +74Zo535, fewfwf@gmail.com') # -- apparently was patched in some python3.7 versions
307
+ with pytest.raises(CanonifyException):
308
+ # borrowed from https://bugs.mageia.org/show_bug.cgi?id=24640#c7
309
+ canonify('https://example.com\uFF03@bing.com')
310
+
311
+ @pytest.mark.parametrize("url,expected", [
312
+ ('https://news.ycombinator.com/item?id=', 'news.ycombinator.com/item?id='),
313
+ ('https://www.youtube.com/watch?v=hvoQiF0kBI8&list&index=2',
314
+ 'youtube.com/watch?v=hvoQiF0kBI8&list='),
315
+ ])
316
+ def test_empty_query_parameter(url, expected):
317
+ assert canonify(url) == expected
318
+
319
+ @pytest.mark.parametrize("url,expected", [
320
+ ('http://www.isfdb.org/cgi-bin/title.cgi?2172', 'isfdb.org/cgi-bin/title.cgi?2172='),
321
+ ('http://www.isfdb.org/cgi-bin/title.cgi?2172+1', 'isfdb.org/cgi-bin/title.cgi?2172%201='),
322
+ ('http://www.isfdb.org/cgi-bin/title.cgi?2172&foo=bar&baz&quux', 'isfdb.org/cgi-bin/title.cgi?2172=&baz=&foo=bar&quux='),
323
+ ])
324
+ def test_qkeep_true(url, expected):
325
+ assert canonify(url) == expected
@@ -0,0 +1,42 @@
1
+ import os
2
+ import time
3
+
4
+ from ..common import _is_windows
5
+
6
+ from .common import get_testdata, promnesia_bin, tmp_popen
7
+
8
+ import pytest
9
+ import requests
10
+
11
+
12
+ ox_hugo_data = get_testdata('ox-hugo/test/site')
13
+
14
+
15
+ def test_demo() -> None:
16
+ if _is_windows:
17
+ # for some reason fails to connect to server..
18
+ # not sure maybe something with port choice idk
19
+ pytest.skip("TODO broken on Windows")
20
+
21
+ with tmp_popen(promnesia_bin('demo', '--port', '16789', ox_hugo_data)):
22
+ # TODO why does it want post??
23
+ time.sleep(2) # meh.. need a generic helper to wait till ready...
24
+ res = {}
25
+ for attempt in range(30):
26
+ time.sleep(1)
27
+ try:
28
+ res = requests.post(
29
+ "http://localhost:16789/search",
30
+ json=dict(url="https://github.com/kaushalmodi/ox-hugo/issues"),
31
+ ).json()
32
+ break
33
+ except:
34
+ continue
35
+ else:
36
+ raise RuntimeError("Couldn't connect to the server")
37
+ vis = res['visits']
38
+ assert len(vis) > 50, vis
39
+ mds = [x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)]
40
+ orgs = [x for x in vis if x['locator']['title'].startswith('content-org/single-posts/empty_tag.org'.replace('/', os.sep))]
41
+ assert len(mds) == 1
42
+ assert len(orgs) == 1
@@ -0,0 +1,30 @@
1
+ from pathlib import Path
2
+ import shutil
3
+
4
+ from ..compare import compare_files
5
+ from .utils import index_urls
6
+
7
+
8
+ def test_compare(tmp_path: Path) -> None:
9
+ idx = index_urls({
10
+ 'https://example.com': None,
11
+ 'https://en.wikipedia.org/wiki/Saturn_V': None,
12
+ 'https://plato.stanford.edu/entries/qualia': None,
13
+ })
14
+ idx(tmp_path)
15
+ db = tmp_path / 'promnesia.sqlite'
16
+ old_db = tmp_path / 'promnesia-old.sqlite'
17
+ shutil.move(str(db), str(old_db))
18
+
19
+ idx2 = index_urls({
20
+ 'https://example.com': None,
21
+ 'https://www.reddit.com/r/explainlikeimfive/comments/1ev6e0/eli5entropy': None,
22
+ 'https://en.wikipedia.org/wiki/Saturn_V': None,
23
+ 'https://plato.stanford.edu/entries/qualia': None,
24
+ })
25
+ idx2(tmp_path)
26
+
27
+ # should not crash, as there are more links in the new database
28
+ assert len(list(compare_files(old_db, db))) == 0
29
+
30
+ assert len(list(compare_files(db, old_db))) == 1