promnesia 1.3.20241021__py3-none-any.whl → 1.4.20250909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- promnesia/__init__.py +4 -1
- promnesia/__main__.py +72 -59
- promnesia/cannon.py +90 -89
- promnesia/common.py +74 -62
- promnesia/compare.py +15 -10
- promnesia/config.py +22 -17
- promnesia/database/dump.py +1 -2
- promnesia/extract.py +6 -6
- promnesia/logging.py +27 -15
- promnesia/misc/install_server.py +25 -19
- promnesia/server.py +69 -53
- promnesia/sources/auto.py +65 -51
- promnesia/sources/browser.py +7 -2
- promnesia/sources/browser_legacy.py +51 -40
- promnesia/sources/demo.py +0 -1
- promnesia/sources/fbmessenger.py +0 -1
- promnesia/sources/filetypes.py +15 -11
- promnesia/sources/github.py +4 -1
- promnesia/sources/guess.py +4 -1
- promnesia/sources/hackernews.py +5 -7
- promnesia/sources/hpi.py +3 -1
- promnesia/sources/html.py +4 -2
- promnesia/sources/instapaper.py +1 -0
- promnesia/sources/markdown.py +4 -4
- promnesia/sources/org.py +17 -8
- promnesia/sources/plaintext.py +14 -11
- promnesia/sources/pocket.py +2 -1
- promnesia/sources/reddit.py +5 -8
- promnesia/sources/roamresearch.py +3 -1
- promnesia/sources/rss.py +4 -5
- promnesia/sources/shellcmd.py +3 -6
- promnesia/sources/signal.py +14 -14
- promnesia/sources/smscalls.py +0 -1
- promnesia/sources/stackexchange.py +2 -2
- promnesia/sources/takeout.py +14 -21
- promnesia/sources/takeout_legacy.py +16 -10
- promnesia/sources/telegram.py +7 -3
- promnesia/sources/telegram_legacy.py +5 -5
- promnesia/sources/twitter.py +1 -1
- promnesia/sources/vcs.py +6 -3
- promnesia/sources/viber.py +2 -2
- promnesia/sources/website.py +4 -3
- promnesia/sqlite.py +10 -7
- promnesia/tests/common.py +2 -0
- promnesia/tests/server_helper.py +2 -2
- promnesia/tests/sources/test_filetypes.py +9 -7
- promnesia/tests/sources/test_hypothesis.py +7 -3
- promnesia/tests/sources/test_org.py +7 -2
- promnesia/tests/sources/test_plaintext.py +9 -7
- promnesia/tests/sources/test_shellcmd.py +10 -9
- promnesia/tests/test_cannon.py +254 -237
- promnesia/tests/test_cli.py +8 -2
- promnesia/tests/test_compare.py +16 -12
- promnesia/tests/test_db_dump.py +4 -3
- promnesia/tests/test_extract.py +7 -4
- promnesia/tests/test_indexer.py +10 -10
- promnesia/tests/test_server.py +10 -10
- promnesia/tests/utils.py +1 -5
- promnesia-1.4.20250909.dist-info/METADATA +66 -0
- promnesia-1.4.20250909.dist-info/RECORD +80 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/WHEEL +1 -2
- promnesia/kjson.py +0 -122
- promnesia/sources/__init__.pyi +0 -0
- promnesia-1.3.20241021.dist-info/METADATA +0 -55
- promnesia-1.3.20241021.dist-info/RECORD +0 -83
- promnesia-1.3.20241021.dist-info/top_level.txt +0 -1
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info}/entry_points.txt +0 -0
- {promnesia-1.3.20241021.dist-info → promnesia-1.4.20250909.dist-info/licenses}/LICENSE +0 -0
promnesia/tests/test_cannon.py
CHANGED
@@ -27,273 +27,279 @@ def check(url, expected):
|
|
27
27
|
# TODO assume spaces are not meaninfgul??
|
28
28
|
# then could align URLs etc?
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
),
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# ( "https://www.youtube.com/watch?v=1NHbPN9pNPM&index=63&list=WL&t=491s"
|
67
|
-
# , "youtube.com/watch?v=1NHbPN9pNPM&list=WL" # TODO not so sure about &t, it's sort of useful
|
68
|
-
# ),
|
69
|
-
# TODO
|
70
|
-
# youtube.com/user/magauchsein/playlists?sort=dd&view=50&shelf_id=14
|
71
|
-
# youtube.com/user/TheChemlife/videos?view=0&sort=p&flow=grid
|
72
|
-
])
|
30
|
+
|
31
|
+
@param(
|
32
|
+
'url,expected',
|
33
|
+
[
|
34
|
+
(
|
35
|
+
'https://www.youtube.com/watch?t=491s&v=1NHbPN9pNPM&index=63&list=WL',
|
36
|
+
# NOTE: t= reordered, makes it more hierarchical
|
37
|
+
# list as well, I guess makes the most sense to keep it at the very end.. since lists are more like tags
|
38
|
+
'youtube.com/watch?v=1NHbPN9pNPM&t=491s&list=WL',
|
39
|
+
),
|
40
|
+
('youtube.com/watch?v=wHrCkyoe72U&feature=share&time_continue=6', 'youtube.com/watch?v=wHrCkyoe72U'),
|
41
|
+
('youtube.com/embed/nyc6RJEEe0U?feature=oembed', 'youtube.com/watch?v=nyc6RJEEe0U'),
|
42
|
+
('https://youtu.be/iCvmsMzlF7o?list=WL', 'youtube.com/watch?v=iCvmsMzlF7o&list=WL'),
|
43
|
+
# TODO can even be like that or contain timestamp (&t=)
|
44
|
+
# TODO warn if param already present? shouldn't happen..
|
45
|
+
# TODO could be interesting to do automatic rule extraction by querying one represnetative and then extracting canonical
|
46
|
+
# TODO national domains don't matter for youtube
|
47
|
+
#
|
48
|
+
# [*, 'youtube', ANY_DOMAIN] / 'embed' -> 'youtube.com/watch'
|
49
|
+
# TODO use regex backrefs?
|
50
|
+
#
|
51
|
+
('m.youtube.com/watch?v=Zn6gV2sdl38', 'youtube.com/watch?v=Zn6gV2sdl38'),
|
52
|
+
# ( "https//youtube.com/playlist?list=PLeOfc0M-50LmJtZwyOfw6aVopmIbU1t7t"
|
53
|
+
# , "youtube.com/playlist?list=PLeOfc0M-50LmJtZwyOfw6aVopmIbU1t7t"
|
54
|
+
# ),
|
55
|
+
# TODO perhaps it should result in video link + sibling link?
|
56
|
+
# when exploring other people's playlists this could be quite useful?
|
57
|
+
#
|
58
|
+
# ( "https://www.youtube.com/watch?v=1NHbPN9pNPM&index=63&list=WL&t=491s"
|
59
|
+
# , "youtube.com/watch?v=1NHbPN9pNPM&list=WL" # TODO not so sure about &t, it's sort of useful
|
60
|
+
# ),
|
61
|
+
# TODO
|
62
|
+
# youtube.com/user/magauchsein/playlists?sort=dd&view=50&shelf_id=14
|
63
|
+
# youtube.com/user/TheChemlife/videos?view=0&sort=p&flow=grid
|
64
|
+
],
|
65
|
+
)
|
73
66
|
def test_youtube(url, expected):
|
74
67
|
assert canonify(url) == expected
|
75
68
|
|
76
69
|
|
77
|
-
@param(
|
78
|
-
'
|
79
|
-
|
80
|
-
|
70
|
+
@param(
|
71
|
+
'url,expected',
|
72
|
+
[
|
73
|
+
(
|
74
|
+
'https://web.archive.org/web/20090902224414/http://reason.com/news/show/119237.html',
|
75
|
+
'reason.com/news/show/119237.html',
|
76
|
+
)
|
77
|
+
],
|
78
|
+
)
|
81
79
|
def test_archiveorg(url, expected):
|
82
80
|
assert canonify(url) == expected
|
83
81
|
|
84
82
|
|
85
83
|
# ugh. good example of motication for cannon.py?
|
86
|
-
@param(
|
87
|
-
'
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
),
|
93
|
-
|
94
|
-
|
95
|
-
|
84
|
+
@param(
|
85
|
+
'url,expected',
|
86
|
+
[
|
87
|
+
(
|
88
|
+
'https://news.ycombinator.com/from?site=jacopo.io',
|
89
|
+
'jacopo.io',
|
90
|
+
),
|
91
|
+
(
|
92
|
+
'https://news.ycombinator.com/item?id=25099862',
|
93
|
+
'news.ycombinator.com/item?id=25099862',
|
94
|
+
),
|
95
|
+
(
|
96
|
+
'https://news.ycombinator.com/reply?id=25100035&goto=item%3Fid%3D25099862%2325100035',
|
97
|
+
TODO,
|
98
|
+
),
|
99
|
+
],
|
100
|
+
)
|
96
101
|
def test_hackernews(url, expected):
|
97
102
|
check(url, expected)
|
98
103
|
|
99
104
|
|
100
|
-
@param(
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
]
|
105
|
+
@param(
|
106
|
+
'url, expected',
|
107
|
+
[
|
108
|
+
(
|
109
|
+
'https://www.reddit.com/r/firefox/comments/bbugc5/firefox_bans_free_speech_commenting_plugin/?ref=readnext',
|
110
|
+
'reddit.com/r/firefox/comments/bbugc5/firefox_bans_free_speech_commenting_plugin',
|
111
|
+
),
|
112
|
+
(
|
113
|
+
'https://www.reddit.com/r/selfhosted/comments/8j8mo3/what_are_you_self_hosting/dz19gh9/?utm_content=permalink&utm_medium=user&utm_source=reddit&utm_name=u_karlicoss',
|
114
|
+
'reddit.com/r/selfhosted/comments/8j8mo3/what_are_you_self_hosting/dz19gh9',
|
115
|
+
),
|
116
|
+
# TODO hmm. parent relationship can just rely on urls for reddit
|
117
|
+
# just need to support it in server I suppose
|
118
|
+
#
|
119
|
+
# TODO search queries?
|
120
|
+
# https://www.reddit.com/search?q=AutoValue
|
121
|
+
#
|
122
|
+
# TODO def need better markdown handling
|
123
|
+
# https://reddit.com/r/intj/comments/cmof04/me_irl/ew4a3dw/][ Me_irl]
|
124
|
+
# reddit.com/r/intj/comments/cmof04/me_irl/ew4a3dw/%5D%5BMe_irl%5D
|
125
|
+
],
|
126
|
+
)
|
121
127
|
def test_reddit(url, expected):
|
122
128
|
assert canonify(url) == expected
|
123
129
|
|
130
|
+
|
124
131
|
# ugh. good example of motication for cannon.py?
|
125
|
-
@param(
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
),
|
133
|
-
])
|
132
|
+
@param(
|
133
|
+
'url,expected',
|
134
|
+
[
|
135
|
+
('https://app.getpocket.com/read/3479402594', 'app.getpocket.com/read/3479402594'),
|
136
|
+
('https://getpocket.com/read/3479402594', 'app.getpocket.com/read/3479402594'),
|
137
|
+
],
|
138
|
+
)
|
134
139
|
def test_pocket(url, expected):
|
135
140
|
assert canonify(url) == expected
|
136
141
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
# ( "https://unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708&usg=AFQjCNEFCGqCAa4P4Zlu2x11bThJispNxQ"
|
243
|
-
# , "unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708"
|
244
|
-
# )
|
245
|
-
])
|
142
|
+
|
143
|
+
@pytest.mark.parametrize(
|
144
|
+
("url", "expected"),
|
145
|
+
[
|
146
|
+
# TODO ?? 'https://groups.google.com/a/list.hypothes.is/forum/#!topic/dev/kcmS7H8ssis',
|
147
|
+
#
|
148
|
+
# TODO FIXME fragment handling
|
149
|
+
# ( "https://www.scottaaronson.com/blog/?p=3167#comment-1731882"
|
150
|
+
# , "scottaaronson.com/blog/?p=3167#comment-1731882"
|
151
|
+
# ),
|
152
|
+
# TODO FIXME fragment handling
|
153
|
+
# ( "https://en.wikipedia.org/wiki/tendon#cite_note-14"
|
154
|
+
# , "en.wikipedia.org/wiki/tendon#cite_note-14"
|
155
|
+
# ),
|
156
|
+
# TODO FIXME fragment handling
|
157
|
+
# ( "https://physicstravelguide.com/experiments/aharonov-bohm#tab__concrete"
|
158
|
+
# , "physicstravelguide.com/experiments/aharonov-bohm#tab__concrete"
|
159
|
+
# ),
|
160
|
+
("https://github.com/search?o=asc&q=track&s=stars&type=Repositories", "github.com/search?q=track"),
|
161
|
+
(
|
162
|
+
"https://80000hours.org/career-decision/article/?utm_source=The+EA+Newsletter&utm_campaign=04ca3c2244-EMAIL_CAMPAIGN_2019_04_03_04_26&utm_medium=email&utm_term=0_51c1df13ac-04ca3c2244-318697649",
|
163
|
+
"80000hours.org/career-decision/article",
|
164
|
+
),
|
165
|
+
(
|
166
|
+
"https://www.facebook.com/photo.php?fbid=24147689823424326&set=pcb.2414778905423667&type=3&theater",
|
167
|
+
"facebook.com/photo.php?fbid=24147689823424326",
|
168
|
+
),
|
169
|
+
(
|
170
|
+
"https://play.google.com/store/apps/details?id=com.faultexception.reader&hl=en",
|
171
|
+
"play.google.com/store/apps/details?id=com.faultexception.reader",
|
172
|
+
),
|
173
|
+
# TODO it also got &p= parameter, which refers to page... not sure how to handle this
|
174
|
+
# news.ycombinator.com/item?id=15451442&p=2
|
175
|
+
("https://news.ycombinator.com/item?id=12172351", "news.ycombinator.com/item?id=12172351"),
|
176
|
+
(
|
177
|
+
"https://urbandictionary.com/define.php?term=Belgian%20Whistle",
|
178
|
+
"urbandictionary.com/define.php?term=Belgian%20Whistle",
|
179
|
+
),
|
180
|
+
("https://en.wikipedia.org/wiki/Dinic%27s_algorithm", "en.wikipedia.org/wiki/Dinic%27s_algorithm"),
|
181
|
+
("zoopla.co.uk/to-rent/details/42756337#D0zlBWeD4X85odsR.97", "zoopla.co.uk/to-rent/details/42756337"),
|
182
|
+
(
|
183
|
+
"withouthspec.co.uk/rooms/16867952?guests=2&adults=2&location=Berlin%2C+Germany&check_in=2017-08-16&check_out=2017-08-20",
|
184
|
+
"withouthspec.co.uk/rooms/16867952",
|
185
|
+
),
|
186
|
+
(
|
187
|
+
"amp.theguardian.com/technology/2017/oct/09/mark-zuckerberg-facebook-puerto-rico-virtual-reality",
|
188
|
+
"theguardian.com/technology/2017/oct/09/mark-zuckerberg-facebook-puerto-rico-virtual-reality",
|
189
|
+
),
|
190
|
+
(
|
191
|
+
"https://answers.yahoo.com/question/index?qid=20071101131442AAk9bGp",
|
192
|
+
"answers.yahoo.com/question/index?qid=20071101131442AAk9bGp",
|
193
|
+
),
|
194
|
+
(
|
195
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010",
|
196
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%E2%80%93-2010",
|
197
|
+
),
|
198
|
+
(
|
199
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-–-2010",
|
200
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%E2%80%93-2010",
|
201
|
+
),
|
202
|
+
(
|
203
|
+
"https://spoonuniversity.com/lifestyle/marmite-ways-to-eat-it&usg=AFQjCNH4s1SOEjlpENlfPV5nuvADZpSdow",
|
204
|
+
"spoonuniversity.com/lifestyle/marmite-ways-to-eat-it",
|
205
|
+
),
|
206
|
+
(
|
207
|
+
'https://google.co.uk/amp/s/amp.reddit.com/r/androidapps/comments/757e2t/swiftkey_or_gboard',
|
208
|
+
'reddit.com/r/androidapps/comments/757e2t/swiftkey_or_gboard',
|
209
|
+
),
|
210
|
+
# should sort query params
|
211
|
+
(
|
212
|
+
'https://www.youtube.com/watch?v=hvoQiF0kBI8&list=WL&index=2',
|
213
|
+
'youtube.com/watch?v=hvoQiF0kBI8&list=WL',
|
214
|
+
),
|
215
|
+
(
|
216
|
+
'https://www.youtube.com/watch?list=WL&v=hvoQiF0kBI8&index=2',
|
217
|
+
'youtube.com/watch?v=hvoQiF0kBI8&list=WL',
|
218
|
+
),
|
219
|
+
# TODO def need to allow the _user_ to define the rules.
|
220
|
+
# no way I can predict everything
|
221
|
+
# basically, allow *interactively* select
|
222
|
+
# also allow introspection, which rule matched?
|
223
|
+
(
|
224
|
+
'https://bbs.archlinux.org/viewtopic.php?id=212740',
|
225
|
+
'bbs.archlinux.org/viewtopic.php?id=212740',
|
226
|
+
),
|
227
|
+
(
|
228
|
+
'https://ubuntuforums.org/showthread.php?t=1403470&s=0dd67bdb12559c22e73a220752db50c7&p=8806195#post8806195',
|
229
|
+
'ubuntuforums.org/showthread.php?t=1403470&p=8806195',
|
230
|
+
),
|
231
|
+
(
|
232
|
+
'https://arstechnica.com/?p=1371299',
|
233
|
+
'arstechnica.com/?p=1371299',
|
234
|
+
# eh. it's a redirect to https://arstechnica.com/information-technology/2018/09/dozens-of-ios-apps-surreptitiously-share-user-location-data-with-tracking-firms/
|
235
|
+
# however in the page body there is <link rel="shorturl" href="https://arstechnica.com/?p=1371299"> ...
|
236
|
+
),
|
237
|
+
# ( "gwern.net/DNB+FAQ"
|
238
|
+
# , "TODO" # ???
|
239
|
+
# ),
|
240
|
+
# TODO shit. is that normal??? perhaps need to manually move fragment?
|
241
|
+
# SplitResult(scheme='https', netloc='unix.stackexchange.com', path='/questions/171603/convert-file-contents-to-lower-case/171708', query='', fragment='171708&usg=AFQjCNEFCGqCAa4P4Zlu2x11bThJispNxQ')
|
242
|
+
# ( "https://unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708&usg=AFQjCNEFCGqCAa4P4Zlu2x11bThJispNxQ"
|
243
|
+
# , "unix.stackexchange.com/questions/171603/convert-file-contents-to-lower-case/171708#171708"
|
244
|
+
# )
|
245
|
+
],
|
246
|
+
)
|
246
247
|
def test(url, expected):
|
247
248
|
assert canonify(url) == expected
|
248
249
|
# TODO github queries
|
250
|
+
|
251
|
+
|
249
252
|
# github.com/search?l=Python&q=reddit+backup
|
250
253
|
# github.com/search?p=3&q=ipynb+language%3AHaskell
|
251
254
|
# github.com/search?q=kobo+ExtraData
|
252
255
|
# github.com/search?q=what-universal-human-experiences-are-you-missing-without-realizing-it
|
253
256
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
# TODO "https://twitter.com/search?q=pinboard search&src=typd"
|
257
|
+
# TODO git+https://github.com/expectocode/telegram-export@master
|
258
|
+
# TODO again, for that actually sequence would be good...
|
258
259
|
|
259
|
-
|
260
|
-
# TODO m.facebook.com
|
261
|
-
# TODO [R('^(youtube|urbandictionary|tesco|scottaaronson|answers.yahoo.com|code.google.com)') , None],
|
260
|
+
# TODO "https://twitter.com/search?q=pinboard search&src=typd"
|
262
261
|
|
262
|
+
# TODO https://www.zalando-lounge.ch/#/
|
263
|
+
# TODO m.facebook.com
|
264
|
+
# TODO [R('^(youtube|urbandictionary|tesco|scottaaronson|answers.yahoo.com|code.google.com)') , None],
|
263
265
|
|
264
266
|
|
265
|
-
|
267
|
+
# TODO
|
266
268
|
# amazon.co.uk/gp/offer-listing/B00525XKL4/ref=dp_olp_new
|
267
269
|
# amazon.co.uk/gp/offer-listing/B00525XKL4/ref=olp_twister_child
|
268
270
|
|
269
|
-
|
270
|
-
|
271
|
+
# TODO
|
272
|
+
# en.wikipedia.org/wiki/S&P_500_Index
|
271
273
|
|
272
274
|
|
273
|
-
|
274
|
-
|
275
|
+
# TODO
|
276
|
+
# google.co.uk/maps/place/Hackney+Bureau/@51.5293789,-0.0527919,16.88z/data=!bla-bla!-bla
|
275
277
|
|
276
278
|
|
277
|
-
|
278
|
-
|
279
|
-
|
279
|
+
# TODO
|
280
|
+
# perhaps, disable utf8 everywhere?
|
281
|
+
# github.com/search?utf8=%E2%9C%93&q=%22My+Clippings.txt%22
|
280
282
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
283
|
+
# TODO FIXME fragment handling
|
284
|
+
# ( "https://www.scottaaronson.com/blog/?p=3167#comment-1731882"
|
285
|
+
# , "scottaaronson.com/blog/?p=3167#comment-1731882"
|
286
|
+
# ),
|
285
287
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
288
|
+
|
289
|
+
@pytest.mark.parametrize(
|
290
|
+
"urls",
|
291
|
+
[
|
292
|
+
{
|
293
|
+
"launchpad.net/ubuntu/%2Bsource/okular",
|
294
|
+
"launchpad.net/ubuntu/+source/okular",
|
295
|
+
},
|
296
|
+
{
|
297
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-–-2010",
|
298
|
+
"flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010",
|
299
|
+
"https://flowingdata.com/2010/12/14/10-best-data-visualization-projects-of-the-year-%e2%80%93-2010/&usg=AFQjCNEZsEGz9rqpWqlFXR5Tc7pkCKY5sQ",
|
300
|
+
},
|
301
|
+
],
|
302
|
+
)
|
297
303
|
def test_same_norm(urls):
|
298
304
|
urls = sorted(urls)
|
299
305
|
u0 = urls[0]
|
@@ -302,24 +308,35 @@ def test_same_norm(urls):
|
|
302
308
|
c = canonify(u)
|
303
309
|
assert c0 == c, f'Expected {u0} and {u} to be same canonically; got {c0} and {c} instead'
|
304
310
|
|
311
|
+
|
305
312
|
def test_error():
|
306
313
|
# canonify(' +74Zo535, fewfwf@gmail.com') # -- apparently was patched in some python3.7 versions
|
307
314
|
with pytest.raises(CanonifyException):
|
308
315
|
# borrowed from https://bugs.mageia.org/show_bug.cgi?id=24640#c7
|
309
|
-
canonify('https://example.com\
|
316
|
+
canonify('https://example.com\uff03@bing.com')
|
310
317
|
|
311
|
-
|
312
|
-
|
313
|
-
(
|
314
|
-
|
315
|
-
|
318
|
+
|
319
|
+
@pytest.mark.parametrize(
|
320
|
+
("url", "expected"),
|
321
|
+
[
|
322
|
+
('https://news.ycombinator.com/item?id=', 'news.ycombinator.com/item?id='),
|
323
|
+
('https://www.youtube.com/watch?v=hvoQiF0kBI8&list&index=2', 'youtube.com/watch?v=hvoQiF0kBI8&list='),
|
324
|
+
],
|
325
|
+
)
|
316
326
|
def test_empty_query_parameter(url, expected):
|
317
327
|
assert canonify(url) == expected
|
318
328
|
|
319
|
-
|
320
|
-
|
321
|
-
(
|
322
|
-
|
323
|
-
|
329
|
+
|
330
|
+
@pytest.mark.parametrize(
|
331
|
+
("url", "expected"),
|
332
|
+
[
|
333
|
+
('http://www.isfdb.org/cgi-bin/title.cgi?2172', 'isfdb.org/cgi-bin/title.cgi?2172='),
|
334
|
+
('http://www.isfdb.org/cgi-bin/title.cgi?2172+1', 'isfdb.org/cgi-bin/title.cgi?2172%201='),
|
335
|
+
(
|
336
|
+
'http://www.isfdb.org/cgi-bin/title.cgi?2172&foo=bar&baz&quux',
|
337
|
+
'isfdb.org/cgi-bin/title.cgi?2172=&baz=&foo=bar&quux=',
|
338
|
+
),
|
339
|
+
],
|
340
|
+
)
|
324
341
|
def test_qkeep_true(url, expected):
|
325
342
|
assert canonify(url) == expected
|
promnesia/tests/test_cli.py
CHANGED
@@ -34,7 +34,13 @@ def test_demo() -> None:
|
|
34
34
|
raise RuntimeError("Couldn't connect to the server")
|
35
35
|
vis = res['visits']
|
36
36
|
assert len(vis) > 50, vis
|
37
|
-
mds = [
|
38
|
-
|
37
|
+
mds = [
|
38
|
+
x for x in vis if x['locator']['title'] == 'content/posts/citations-example-toml.md'.replace('/', os.sep)
|
39
|
+
]
|
40
|
+
orgs = [
|
41
|
+
x
|
42
|
+
for x in vis
|
43
|
+
if x['locator']['title'].startswith('content-org/single-posts/empty_tag.org'.replace('/', os.sep))
|
44
|
+
]
|
39
45
|
assert len(mds) == 1
|
40
46
|
assert len(orgs) == 1
|
promnesia/tests/test_compare.py
CHANGED
@@ -6,22 +6,26 @@ from .utils import index_urls
|
|
6
6
|
|
7
7
|
|
8
8
|
def test_compare(tmp_path: Path) -> None:
|
9
|
-
idx = index_urls(
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
idx = index_urls(
|
10
|
+
{
|
11
|
+
'https://example.com': None,
|
12
|
+
'https://en.wikipedia.org/wiki/Saturn_V': None,
|
13
|
+
'https://plato.stanford.edu/entries/qualia': None,
|
14
|
+
}
|
15
|
+
)
|
14
16
|
idx(tmp_path)
|
15
|
-
db
|
17
|
+
db = tmp_path / 'promnesia.sqlite'
|
16
18
|
old_db = tmp_path / 'promnesia-old.sqlite'
|
17
19
|
shutil.move(str(db), str(old_db))
|
18
20
|
|
19
|
-
idx2 = index_urls(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
idx2 = index_urls(
|
22
|
+
{
|
23
|
+
'https://example.com': None,
|
24
|
+
'https://www.reddit.com/r/explainlikeimfive/comments/1ev6e0/eli5entropy': None,
|
25
|
+
'https://en.wikipedia.org/wiki/Saturn_V': None,
|
26
|
+
'https://plato.stanford.edu/entries/qualia': None,
|
27
|
+
}
|
28
|
+
)
|
25
29
|
idx2(tmp_path)
|
26
30
|
|
27
31
|
# should not crash, as there are more links in the new database
|