promnesia 1.2.20240810__tar.gz → 1.3.20241021__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.github/workflows/main.yml +7 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/PKG-INFO +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/mypy.ini +2 -3
- promnesia-1.3.20241021/ruff.toml +147 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/setup.py +2 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/__init__.py +14 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/__main__.py +38 -25
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/cannon.py +23 -23
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/common.py +49 -42
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/compare.py +18 -20
- promnesia-1.3.20241021/src/promnesia/compat.py +12 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/config.py +20 -22
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/database/common.py +4 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/database/dump.py +14 -13
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/database/load.py +7 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/extract.py +13 -11
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/kjson.py +11 -10
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/logging.py +1 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/misc/install_server.py +7 -8
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/server.py +42 -31
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/auto.py +43 -30
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/auto_logseq.py +6 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/auto_obsidian.py +2 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/browser.py +14 -9
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/browser_legacy.py +17 -13
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/demo.py +7 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/fbmessenger.py +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/filetypes.py +9 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/github.py +5 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/guess.py +2 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/hackernews.py +2 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/hpi.py +2 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/html.py +7 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/hypothesis.py +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/instapaper.py +2 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/markdown.py +17 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/org.py +20 -10
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/plaintext.py +30 -31
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/pocket.py +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/reddit.py +19 -18
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/roamresearch.py +2 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/rss.py +3 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/shellcmd.py +19 -6
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/signal.py +14 -13
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/smscalls.py +2 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/stackexchange.py +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/takeout.py +23 -13
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/takeout_legacy.py +15 -11
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/telegram.py +13 -11
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/telegram_legacy.py +18 -7
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/twitter.py +6 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/vcs.py +5 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/viber.py +10 -9
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/website.py +4 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/zulip.py +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sqlite.py +7 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/common.py +8 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/server_helper.py +11 -8
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_auto.py +2 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_filetypes.py +2 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_hypothesis.py +3 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_org.py +2 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_plaintext.py +0 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_shellcmd.py +3 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/test_takeout.py +3 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_cannon.py +5 -5
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_cli.py +4 -6
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_config.py +7 -8
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_db_dump.py +11 -12
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_extract.py +10 -6
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_indexer.py +14 -8
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_server.py +2 -3
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_traverse.py +0 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/utils.py +4 -4
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/PKG-INFO +3 -2
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/SOURCES.txt +1 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/requires.txt +1 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/common.py +18 -8
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/end2end_test.py +19 -10
- promnesia-1.3.20241021/tests/testdata/test_multiple_page_updates/index.html +32 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tox.ini +26 -22
- promnesia-1.2.20240810/ruff.toml +0 -25
- promnesia-1.2.20240810/src/promnesia/compat.py +0 -12
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/.dockerignore +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/Dockerfile +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/build_and_run.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/scripts/build_and_run_tests.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/scripts/setup_chrome.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/scripts/setup_firefox.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/end2end/scripts/setup_node.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/fake-systemd/systemctl +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/github-ci-compat +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/release +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.ci/run +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.dockerignore +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.gitignore +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/.gitmodules +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/CHANGELOG.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/LICENSE +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/README.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/benchmarks/20231115.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/ci/run-github-locally +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/DEVELOPMENT.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/GUIDE.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/PRIVACY.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/SOURCES.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/TROUBLESHOOTING.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/addons-mozilla-org.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/doc/config.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/.gitignore +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/docker_files/Dockerfile +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/docker_files/Dockerfile-indexer +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/docker_files/docker-compose.yaml +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/docker_files/indexer-config.py.example +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/docker_files/indexer-entrypoint.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/get-some-data.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/init.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/docker/start.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/.ci/build +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/.editorconfig +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/MANUAL-TESTS.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/TODO.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/__mocks__/browser.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/__mocks__/dom-form-serializer.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/amo-metadata.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/babel.config.cjs +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/build +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/eslint.config.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/generate_manifest.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/jest.config.cjs +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/old/flow-typed/webextension-polyfill.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/old/patcher.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/old/webpack.config.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/package-lock.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/package.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/rollup.config.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/api.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/background.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/background_chrome_mv2.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/common.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/compat.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/display.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/filterlist.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/generate +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_blacklisted_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_blue_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_boring_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_error.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_not_visited_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_relatives_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/ic_visited_48.png +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/images/source_48.svg +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/normalise.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/notifications.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/options.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/options_page.css +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/options_page.html +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/options_page.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/search.html +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/search.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/selenium_bridge.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/showvisited.css +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/showvisited.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/sidebar-outer.css +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/sidebar.css +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/sidebar.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/sources.ts +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/toastify.css +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/src/toastify.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tests/anchorme.test.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tests/common.test.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tests/defensify.test.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tests/integration.test.js +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tests/test.html +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/extension/tsconfig.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/pytest.ini +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/scripts/backup-phone-history.sh +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/scripts/browser_history.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/scripts/promnesia +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/setup.cfg +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/misc/__init__.pyi +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/misc/config_example.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/py.typed +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/sources/__init__.pyi +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/__init__.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/sources/__init__.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_compare.py +1 -1
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia/tests/test_extract_urls.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/dependency_links.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/entry_points.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/not-zip-safe +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/src/promnesia.egg-info/top_level.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/addon.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/addon_helper.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/convert_screencast.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/demos.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/install_and_run +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/record.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/orgs/file.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/orgs/file2.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/orgs/file3.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/orgs/file4.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/orgs/file5.org +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/auto/pocket.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/custom/file1.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/custom/file2.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/logseq-graph/logseq/config.edn +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/logseq-graph/pages/Note.md +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/normalise/ff.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/obsidian-vault/.obsidian/app.json +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/obsidian-vault/Note.md +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/takeout/Takeout/My Activity/Chrome/MyActivity.html +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/takeout-20150518T000000Z.zip +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/test_config.py +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/traverse/ignoreme.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/traverse/ignoreme2/notrealignored.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/traverse/imhere.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/traverse/imhere2/real.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/testdata/weird.txt +0 -0
- {promnesia-1.2.20240810 → promnesia-1.3.20241021}/tests/webdriver_utils.py +0 -0
@@ -24,22 +24,23 @@ jobs:
|
|
24
24
|
fail-fast: false
|
25
25
|
matrix:
|
26
26
|
platform: [ubuntu-latest, macos-latest, windows-latest]
|
27
|
-
python-version: ['3.
|
27
|
+
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
28
28
|
exclude: [
|
29
29
|
# windows runners are pretty scarce, so let's only run lowest and highest python version
|
30
|
-
{platform: windows-latest, python-version: '3.9' },
|
31
30
|
{platform: windows-latest, python-version: '3.10'},
|
32
31
|
{platform: windows-latest, python-version: '3.11'},
|
32
|
+
{platform: windows-latest, python-version: '3.12'},
|
33
33
|
|
34
34
|
# same, macos is a bit too slow and ubuntu covers python quirks well
|
35
|
-
{platform: macos-latest , python-version: '3.9' },
|
36
35
|
{platform: macos-latest , python-version: '3.10'},
|
37
36
|
{platform: macos-latest , python-version: '3.11'},
|
37
|
+
{platform: macos-latest , python-version: '3.12'},
|
38
38
|
]
|
39
39
|
|
40
40
|
runs-on: ${{ matrix.platform }}
|
41
41
|
|
42
|
-
#
|
42
|
+
# useful for 'optional' pipelines
|
43
|
+
# continue-on-error: ${{ matrix.platform == 'windows-latest' }}
|
43
44
|
|
44
45
|
steps:
|
45
46
|
# ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation
|
@@ -63,11 +64,13 @@ jobs:
|
|
63
64
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
64
65
|
uses: actions/upload-artifact@v4
|
65
66
|
with:
|
67
|
+
include-hidden-files: true
|
66
68
|
name: .coverage.mypy-core_${{ matrix.platform }}_${{ matrix.python-version }}
|
67
69
|
path: .coverage.mypy-core/
|
68
70
|
- if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms
|
69
71
|
uses: actions/upload-artifact@v4
|
70
72
|
with:
|
73
|
+
include-hidden-files: true
|
71
74
|
name: .coverage.mypy-misc_${{ matrix.platform }}_${{ matrix.python-version }}
|
72
75
|
path: .coverage.mypy-misc/
|
73
76
|
|
@@ -1,15 +1,16 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: promnesia
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.3.20241021
|
4
4
|
Summary: Enhancement of your browsing history
|
5
5
|
Home-page: https://github.com/karlicoss/promnesia
|
6
6
|
Author: Dmitrii Gerasimov
|
7
7
|
Author-email: karlicoss@gmail.com
|
8
|
-
Requires-Python: >=3.
|
8
|
+
Requires-Python: >=3.9
|
9
9
|
License-File: LICENSE
|
10
10
|
Requires-Dist: appdirs
|
11
11
|
Requires-Dist: tzlocal
|
12
12
|
Requires-Dist: more_itertools
|
13
|
+
Requires-Dist: typing-extensions
|
13
14
|
Requires-Dist: pytz
|
14
15
|
Requires-Dist: sqlalchemy>=2.0
|
15
16
|
Requires-Dist: urlextract
|
@@ -1,14 +1,13 @@
|
|
1
1
|
[mypy]
|
2
|
-
namespace_packages = True
|
3
2
|
pretty = True
|
4
3
|
show_error_context = True
|
5
|
-
show_error_codes = True
|
6
4
|
show_column_numbers = True
|
7
5
|
show_error_end = True
|
6
|
+
warn_redundant_casts = True
|
8
7
|
warn_unused_ignores = True
|
9
8
|
check_untyped_defs = True
|
10
|
-
enable_error_code = possibly-undefined
|
11
9
|
strict_equality = True
|
10
|
+
enable_error_code = possibly-undefined
|
12
11
|
|
13
12
|
# not sure why mypy started discovering it (since 0.800??)
|
14
13
|
[mypy-hypothesis]
|
@@ -0,0 +1,147 @@
|
|
1
|
+
target-version = "py39" # NOTE: inferred from pyproject.toml if present
|
2
|
+
|
3
|
+
lint.extend-select = [
|
4
|
+
"F", # flakes rules -- default, but extend just in case
|
5
|
+
"E", # pycodestyle -- default, but extend just in case
|
6
|
+
"W", # various warnings
|
7
|
+
|
8
|
+
"B", # 'bugbear' set -- various possible bugs
|
9
|
+
"C4", # flake8-comprehensions -- unnecessary list/map/dict calls
|
10
|
+
"COM", # trailing commas
|
11
|
+
"EXE", # various checks wrt executable files
|
12
|
+
"I", # sort imports
|
13
|
+
"ICN", # various import conventions
|
14
|
+
"FBT", # detect use of boolean arguments
|
15
|
+
"FURB", # various rules
|
16
|
+
"PERF", # various potential performance speedups
|
17
|
+
"PD", # pandas rules
|
18
|
+
"PIE", # 'misc' lints
|
19
|
+
"PLC", # pylint convention rules
|
20
|
+
"PLR", # pylint refactor rules
|
21
|
+
"PLW", # pylint warnings
|
22
|
+
"PT", # pytest stuff
|
23
|
+
"PYI", # various type hinting rules
|
24
|
+
"RET", # early returns
|
25
|
+
"RUF", # various ruff-specific rules
|
26
|
+
"TID", # various imports suggestions
|
27
|
+
"TRY", # various exception handling rules
|
28
|
+
"UP", # detect deprecated python stdlib stuff
|
29
|
+
"FA", # suggest using from __future__ import annotations
|
30
|
+
"PTH", # pathlib migration
|
31
|
+
"ARG", # unused argument checks
|
32
|
+
"A", # builtin shadowing
|
33
|
+
"G", # logging stuff
|
34
|
+
# "EM", # TODO hmm could be helpful to prevent duplicate err msg in traceback.. but kinda annoying
|
35
|
+
|
36
|
+
# "ALL", # uncomment this to check for new rules!
|
37
|
+
]
|
38
|
+
|
39
|
+
lint.ignore = [
|
40
|
+
"D", # annoying nags about docstrings
|
41
|
+
"N", # pep naming
|
42
|
+
"TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING
|
43
|
+
"S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks
|
44
|
+
"DTZ", # datetimes checks -- complaining about missing tz and mostly false positives
|
45
|
+
"FIX", # complains about fixmes/todos -- annoying
|
46
|
+
"TD", # complains about todo formatting -- too annoying
|
47
|
+
"ANN", # missing type annotations? seems way to strict though
|
48
|
+
|
49
|
+
### too opinionated style checks
|
50
|
+
"E501", # too long lines
|
51
|
+
"E702", # Multiple statements on one line (semicolon)
|
52
|
+
"E731", # assigning lambda instead of using def
|
53
|
+
"E741", # Ambiguous variable name: `l`
|
54
|
+
"E742", # Ambiguous class name: `O
|
55
|
+
"E401", # Multiple imports on one line
|
56
|
+
"F403", # import *` used; unable to detect undefined names
|
57
|
+
###
|
58
|
+
|
59
|
+
###
|
60
|
+
"E722", # Do not use bare `except` ## Sometimes it's useful for defensive imports and that sort of thing..
|
61
|
+
"F811", # Redefinition of unused # this gets in the way of pytest fixtures (e.g. in cachew)
|
62
|
+
|
63
|
+
## might be nice .. but later and I don't wanna make it strict
|
64
|
+
"E402", # Module level import not at top of file
|
65
|
+
|
66
|
+
### maybe consider these soon
|
67
|
+
# sometimes it's useful to give a variable a name even if we don't use it as a documentation
|
68
|
+
# on the other hand, often is a sign of error
|
69
|
+
"F841", # Local variable `count` is assigned to but never used
|
70
|
+
###
|
71
|
+
|
72
|
+
"RUF100", # unused noqa -- handle later
|
73
|
+
"RUF012", # mutable class attrs should be annotated with ClassVar... ugh pretty annoying for user configs
|
74
|
+
|
75
|
+
### these are just nitpicky, we usually know better
|
76
|
+
"PLR0911", # too many return statements
|
77
|
+
"PLR0912", # too many branches
|
78
|
+
"PLR0913", # too many function arguments
|
79
|
+
"PLR0915", # too many statements
|
80
|
+
"PLR1714", # consider merging multiple comparisons
|
81
|
+
"PLR2044", # line with empty comment
|
82
|
+
"PLR5501", # use elif instead of else if
|
83
|
+
"PLR2004", # magic value in comparison -- super annoying in tests
|
84
|
+
###
|
85
|
+
"PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check
|
86
|
+
|
87
|
+
"B009", # calling gettattr with constant attribute -- this is useful to convince mypy
|
88
|
+
"B010", # same as above, but setattr
|
89
|
+
"B011", # complains about assert False
|
90
|
+
"B017", # pytest.raises(Exception)
|
91
|
+
"B023", # seems to result in false positives?
|
92
|
+
"B028", # suggest using explicit stacklevel? TODO double check later, but not sure it's useful
|
93
|
+
|
94
|
+
# complains about useless pass, but has sort of a false positive if the function has a docstring?
|
95
|
+
# this is common for click entrypoints (e.g. in __main__), so disable
|
96
|
+
"PIE790",
|
97
|
+
|
98
|
+
# a bit too annoying, offers to convert for loops to list comprehension
|
99
|
+
# , which may heart readability
|
100
|
+
"PERF401",
|
101
|
+
|
102
|
+
# suggests no using exception in for loops
|
103
|
+
# we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost"
|
104
|
+
"PERF203",
|
105
|
+
|
106
|
+
"RET504", # unnecessary assignment before returning -- that can be useful for readability
|
107
|
+
"RET505", # unnecessary else after return -- can hurt readability
|
108
|
+
|
109
|
+
"PLW0603", # global variable update.. we usually know why we are doing this
|
110
|
+
"PLW2901", # for loop variable overwritten, usually this is intentional
|
111
|
+
|
112
|
+
"PT004", # deprecated rule, will be removed later
|
113
|
+
"PT011", # pytest raises should is too broad
|
114
|
+
"PT012", # pytest raises should contain a single statement
|
115
|
+
|
116
|
+
"COM812", # trailing comma missing -- mostly just being annoying with long multiline strings
|
117
|
+
|
118
|
+
"PD901", # generic variable name df
|
119
|
+
|
120
|
+
"TRY003", # suggests defining exception messages in exception class -- kinda annoying
|
121
|
+
"TRY004", # prefer TypeError -- don't see the point
|
122
|
+
"TRY201", # raise without specifying exception name -- sometimes hurts readability
|
123
|
+
"TRY400", # TODO double check this, might be useful
|
124
|
+
"TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging
|
125
|
+
|
126
|
+
"PGH", # TODO force error code in mypy instead
|
127
|
+
|
128
|
+
"TID252", # Prefer absolute imports over relative imports from parent modules
|
129
|
+
|
130
|
+
"UP038", # suggests using | (union) in isisntance checks.. but it results in slower code
|
131
|
+
|
132
|
+
## too annoying
|
133
|
+
"T20", # just complains about prints and pprints
|
134
|
+
"Q", # flake quotes, too annoying
|
135
|
+
"C90", # some complexity checking
|
136
|
+
"G004", # logging statement uses f string
|
137
|
+
"ERA001", # commented out code
|
138
|
+
"SLF001", # private member accessed
|
139
|
+
"BLE001", # do not catch 'blind' Exception
|
140
|
+
"INP001", # complains about implicit namespace packages
|
141
|
+
"SIM", # some if statements crap
|
142
|
+
"RSE102", # complains about missing parens in exceptions
|
143
|
+
##
|
144
|
+
|
145
|
+
"ARG001", # ugh, kinda annoying when using pytest fixtures
|
146
|
+
"RUF001", "RUF002", "RUF003", # spams about non-latin characters that we do use for testing
|
147
|
+
]
|
@@ -32,11 +32,12 @@ def main() -> None:
|
|
32
32
|
author_email='karlicoss@gmail.com',
|
33
33
|
description='Enhancement of your browsing history',
|
34
34
|
|
35
|
-
python_requires='>=3.
|
35
|
+
python_requires='>=3.9',
|
36
36
|
install_requires=[
|
37
37
|
'appdirs', # for portable user directories detection
|
38
38
|
'tzlocal',
|
39
39
|
'more_itertools',
|
40
|
+
'typing-extensions',
|
40
41
|
'pytz',
|
41
42
|
'sqlalchemy>=2.0', # DB api
|
42
43
|
|
@@ -1,6 +1,17 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res
|
3
|
-
|
4
1
|
# add deprecation warning so eventually this may converted to a namespace package?
|
5
2
|
import warnings
|
3
|
+
|
4
|
+
from .common import ( # noqa: F401
|
5
|
+
Context,
|
6
|
+
DbVisit,
|
7
|
+
Loc,
|
8
|
+
PathIsh,
|
9
|
+
Res,
|
10
|
+
Results,
|
11
|
+
Source,
|
12
|
+
Visit,
|
13
|
+
last,
|
14
|
+
)
|
15
|
+
|
16
|
+
# TODO think again about it -- what are the pros and cons?
|
6
17
|
warnings.warn("DEPRECATED! Please import directly from 'promnesia.common', e.g. 'from promnesia.common import Visit, Source, Results'", DeprecationWarning)
|
@@ -5,24 +5,34 @@ import ast
|
|
5
5
|
import importlib
|
6
6
|
import inspect
|
7
7
|
import os
|
8
|
-
|
8
|
+
import shlex
|
9
9
|
import shutil
|
10
|
-
from subprocess import run, check_call, Popen
|
11
10
|
import sys
|
11
|
+
from collections.abc import Iterable, Iterator, Sequence
|
12
|
+
from pathlib import Path
|
13
|
+
from subprocess import Popen, check_call, run
|
12
14
|
from tempfile import TemporaryDirectory, gettempdir
|
13
|
-
from typing import Callable
|
14
|
-
|
15
|
-
|
16
|
-
from . import
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
from typing import Callable
|
16
|
+
|
17
|
+
from . import config, server
|
18
|
+
from .common import (
|
19
|
+
DbVisit,
|
20
|
+
Extractor,
|
21
|
+
PathIsh,
|
22
|
+
Res,
|
23
|
+
Source,
|
24
|
+
default_config_path,
|
25
|
+
get_system_tz,
|
26
|
+
get_tmpdir,
|
27
|
+
logger,
|
28
|
+
user_config_file,
|
29
|
+
)
|
21
30
|
from .database.dump import visits_to_sqlite
|
22
31
|
from .extract import extract_visits
|
32
|
+
from .misc import install_server
|
23
33
|
|
24
34
|
|
25
|
-
def iter_all_visits(sources_subset: Iterable[
|
35
|
+
def iter_all_visits(sources_subset: Iterable[str | int] = ()) -> Iterator[Res[DbVisit]]:
|
26
36
|
cfg = config.get()
|
27
37
|
output_dir = cfg.output_dir
|
28
38
|
# not sure if belongs here??
|
@@ -74,7 +84,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
|
|
74
84
|
logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))
|
75
85
|
|
76
86
|
|
77
|
-
def _do_index(dry: bool=False, sources_subset: Iterable[
|
87
|
+
def _do_index(*, dry: bool = False, sources_subset: Iterable[str | int] = (), overwrite_db: bool = False) -> Iterable[Exception]:
|
78
88
|
# also keep & return errors for further display
|
79
89
|
errors: list[Exception] = []
|
80
90
|
def it() -> Iterable[Res[DbVisit]]:
|
@@ -98,9 +108,10 @@ def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), ove
|
|
98
108
|
|
99
109
|
def do_index(
|
100
110
|
config_file: Path,
|
101
|
-
|
102
|
-
|
103
|
-
|
111
|
+
*,
|
112
|
+
dry: bool = False,
|
113
|
+
sources_subset: Iterable[str | int] = (),
|
114
|
+
overwrite_db: bool = False,
|
104
115
|
) -> Sequence[Exception]:
|
105
116
|
config.load_from(config_file) # meh.. should be cleaner
|
106
117
|
try:
|
@@ -120,7 +131,8 @@ def demo_sources() -> dict[str, Callable[[], Extractor]]:
|
|
120
131
|
def lazy(name: str) -> Callable[[], Extractor]:
|
121
132
|
# helper to avoid failed imports etc, since people might be lacking necessary dependencies
|
122
133
|
def inner() -> Extractor:
|
123
|
-
|
134
|
+
# TODO why this import??
|
135
|
+
from . import sources # noqa: F401
|
124
136
|
module = importlib.import_module(f'promnesia.sources.{name}')
|
125
137
|
return getattr(module, 'index')
|
126
138
|
return inner
|
@@ -145,7 +157,7 @@ def do_demo(
|
|
145
157
|
config_file: Path | None,
|
146
158
|
dry: bool=False,
|
147
159
|
name: str='demo',
|
148
|
-
sources_subset: Iterable[
|
160
|
+
sources_subset: Iterable[str | int]=(),
|
149
161
|
overwrite_db: bool=False,
|
150
162
|
) -> None:
|
151
163
|
with TemporaryDirectory() as tdir:
|
@@ -219,9 +231,10 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
|
|
219
231
|
logger.info('config: %s', cfg)
|
220
232
|
|
221
233
|
def check(cmd: list[str | Path], **kwargs) -> Iterable[Exception]:
|
222
|
-
logger.debug(
|
223
|
-
res = run(cmd, **kwargs)
|
234
|
+
logger.debug(shlex.join(map(str, cmd)))
|
235
|
+
res = run(cmd, **kwargs) # noqa: PLW1510
|
224
236
|
if res.returncode > 0:
|
237
|
+
# TODO what's up with empty exception??
|
225
238
|
yield Exception()
|
226
239
|
|
227
240
|
logger.info('Checking syntax...')
|
@@ -239,7 +252,7 @@ def _config_check(cfg: Path) -> Iterable[Exception]:
|
|
239
252
|
# todo not sure if should be more defensive than check_call here
|
240
253
|
logger.info('Checking type safety...')
|
241
254
|
try:
|
242
|
-
import mypy
|
255
|
+
import mypy # noqa: F401
|
243
256
|
except ImportError:
|
244
257
|
logger.warning("mypy not found, can't use it to check config!")
|
245
258
|
else:
|
@@ -291,7 +304,7 @@ def cli_doctor_server(args: argparse.Namespace) -> None:
|
|
291
304
|
logger.info('You should see the database path and version above!')
|
292
305
|
|
293
306
|
|
294
|
-
def _ordinal_or_name(s: str) ->
|
307
|
+
def _ordinal_or_name(s: str) -> str | int:
|
295
308
|
try:
|
296
309
|
s = int(s) # type: ignore
|
297
310
|
except ValueError:
|
@@ -328,7 +341,7 @@ def main() -> None:
|
|
328
341
|
|
329
342
|
F = lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, width=120)
|
330
343
|
p = argparse.ArgumentParser(formatter_class=F)
|
331
|
-
subp = p.add_subparsers(dest='mode'
|
344
|
+
subp = p.add_subparsers(dest='mode' )
|
332
345
|
ep = subp.add_parser('index', help='Create/update the link database', formatter_class=F)
|
333
346
|
add_index_args(ep, default_config_path())
|
334
347
|
# TODO use some way to override or provide config only via cmdline?
|
@@ -348,7 +361,7 @@ def main() -> None:
|
|
348
361
|
ap.add_argument('--no-serve', action='store_const', const=None, dest='port', help='Pass to only index without running server')
|
349
362
|
ap.add_argument(
|
350
363
|
'--as',
|
351
|
-
choices=
|
364
|
+
choices=sorted(demo_sources().keys()),
|
352
365
|
default='guess',
|
353
366
|
help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
|
354
367
|
)
|
@@ -359,7 +372,7 @@ def main() -> None:
|
|
359
372
|
install_server.setup_parser(isp)
|
360
373
|
|
361
374
|
cp = subp.add_parser('config', help='Config management')
|
362
|
-
cp.set_defaults(func=lambda *
|
375
|
+
cp.set_defaults(func=lambda *_args: cp.print_help())
|
363
376
|
scp = cp.add_subparsers()
|
364
377
|
ccp = scp.add_parser('check', help='Check config')
|
365
378
|
ccp.set_defaults(func=config_check)
|
@@ -373,7 +386,7 @@ def main() -> None:
|
|
373
386
|
|
374
387
|
dp = subp.add_parser('doctor', help='Troubleshooting assistant')
|
375
388
|
dp.add_argument('--config', type=Path, default=default_config_path(), help='Config path')
|
376
|
-
dp.set_defaults(func=lambda *
|
389
|
+
dp.set_defaults(func=lambda *_args: dp.print_help())
|
377
390
|
sdp = dp.add_subparsers()
|
378
391
|
sdp.add_parser('config' , help='Check config' ).set_defaults(func=config_check )
|
379
392
|
sdp.add_parser('database', help='Inspect database').set_defaults(func=cli_doctor_db)
|
@@ -9,16 +9,17 @@ are same content, but you can't tell that by URL equality. Even canonical urls a
|
|
9
9
|
|
10
10
|
Also some experiments to establish 'URL hierarchy'.
|
11
11
|
"""
|
12
|
-
|
12
|
+
from __future__ import annotations
|
13
13
|
|
14
|
-
from itertools import chain
|
15
14
|
import re
|
16
15
|
import typing
|
17
|
-
from typing import Iterable, NamedTuple, Set, Optional, List, Sequence, Union, Tuple, Dict, Any, Collection
|
18
|
-
|
19
16
|
import urllib.parse
|
20
|
-
from
|
17
|
+
from collections.abc import Collection, Iterable, Sequence
|
21
18
|
|
19
|
+
# TODO eh?? they fixed mobile.twitter.com?
|
20
|
+
from itertools import chain
|
21
|
+
from typing import Any, NamedTuple, Union
|
22
|
+
from urllib.parse import SplitResult, parse_qsl, urlencode, urlsplit, urlunsplit
|
22
23
|
|
23
24
|
# this has some benchmark, but quite a few librarires seem unmaintained, sadly
|
24
25
|
# I guess i'll stick to default for now, until it's a critical bottleneck
|
@@ -108,11 +109,11 @@ default_qkeep = [
|
|
108
109
|
|
109
110
|
# TODO perhaps, decide if fragment is meaningful (e.g. wiki) or random sequence of letters?
|
110
111
|
class Spec(NamedTuple):
|
111
|
-
qkeep :
|
112
|
-
qremove:
|
112
|
+
qkeep : Collection[str] | bool | None = None
|
113
|
+
qremove: set[str] | None = None
|
113
114
|
fkeep : bool = False
|
114
115
|
|
115
|
-
def keep_query(self, q: str) ->
|
116
|
+
def keep_query(self, q: str) -> int | None: # returns order
|
116
117
|
if self.qkeep is True:
|
117
118
|
return 1
|
118
119
|
qkeep = {
|
@@ -134,13 +135,13 @@ class Spec(NamedTuple):
|
|
134
135
|
return None
|
135
136
|
|
136
137
|
@classmethod
|
137
|
-
def make(cls, **kwargs) ->
|
138
|
+
def make(cls, **kwargs) -> Spec:
|
138
139
|
return cls(**kwargs)
|
139
140
|
|
140
141
|
S = Spec
|
141
142
|
|
142
143
|
# TODO perhaps these can be machine learnt from large set of urls?
|
143
|
-
specs:
|
144
|
+
specs: dict[str, Spec] = {
|
144
145
|
'youtube.com': S(
|
145
146
|
# TODO search_query?
|
146
147
|
qkeep=[ # note: experimental.. order matters here
|
@@ -178,7 +179,6 @@ specs: Dict[str, Spec] = {
|
|
178
179
|
|
179
180
|
'source', 'tsid', 'refsrc', 'pnref', 'rc', '_rdr', 'src', 'hc_location', 'section', 'permPage', 'soft', 'pn_ref', 'action',
|
180
181
|
'ti', 'aref', 'event_time_id', 'action_history', 'filter', 'ref_notif_type', 'has_source', 'source_newsfeed_story_type',
|
181
|
-
'ref_notif_type',
|
182
182
|
},
|
183
183
|
),
|
184
184
|
'physicstravelguide.com': S(fkeep=True), # TODO instead, pass fkeep marker object for shorter spec?
|
@@ -218,10 +218,10 @@ Spec2 = Any # TODO
|
|
218
218
|
|
219
219
|
# TODO this should be a map
|
220
220
|
Frag = Any
|
221
|
-
Parts = Sequence[
|
221
|
+
Parts = Sequence[tuple[str, str]]
|
222
222
|
|
223
223
|
|
224
|
-
def _yc(domain: str, path: str, qq: Parts, frag: Frag) ->
|
224
|
+
def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> tuple[Any, Any, Parts, Frag]:
|
225
225
|
if path[:5] == '/from':
|
226
226
|
site = dict(qq).get('site')
|
227
227
|
if site is not None:
|
@@ -232,7 +232,7 @@ def _yc(domain: str, path: str, qq: Parts, frag: Frag) -> Tuple[Any, Any, Parts,
|
|
232
232
|
# TODO this should be in-place? for brevity?
|
233
233
|
return (domain, path, qq, frag)
|
234
234
|
|
235
|
-
def get_spec2(dom: str) ->
|
235
|
+
def get_spec2(dom: str) -> Spec2 | None:
|
236
236
|
return {
|
237
237
|
'news.ycombinator.com': _yc,
|
238
238
|
}.get(dom)
|
@@ -285,10 +285,10 @@ def transform_split(split: SplitResult):
|
|
285
285
|
REST = r'(?P<rest>.*)'
|
286
286
|
|
287
287
|
Left = Union[str, Sequence[str]]
|
288
|
-
Right =
|
288
|
+
Right = tuple[str, str, str]
|
289
289
|
# the idea is that we can unify certain URLs here and map them to the 'canonical' one
|
290
290
|
# this is a dict only for grouping but should be a list really.. todo
|
291
|
-
rules:
|
291
|
+
rules: dict[Left, Right] = {
|
292
292
|
# TODO m. handling might be quite common
|
293
293
|
# f'm.youtube.com/{REST}': ('youtube.com', '{rest}'),
|
294
294
|
(
|
@@ -322,9 +322,9 @@ def transform_split(split: SplitResult):
|
|
322
322
|
continue
|
323
323
|
gd = m.groupdict()
|
324
324
|
if len(to) == 2:
|
325
|
-
to = to
|
325
|
+
to = (*to, '')
|
326
326
|
|
327
|
-
(netloc, path, qq) =
|
327
|
+
(netloc, path, qq) = (t.format(**gd) for t in to)
|
328
328
|
qparts.extend(parse_qsl(qq, keep_blank_values=True)) # TODO hacky..
|
329
329
|
# TODO eh, qparts should really be a map or something...
|
330
330
|
break
|
@@ -361,7 +361,7 @@ def myunsplit(domain: str, path: str, query: str, fragment: str) -> str:
|
|
361
361
|
# ]
|
362
362
|
# for re in regexes:
|
363
363
|
|
364
|
-
def handle_archive_org(url: str) ->
|
364
|
+
def handle_archive_org(url: str) -> str | None:
|
365
365
|
are = r'web.archive.org/web/(?P<timestamp>\d+)/(?P<rest>.*)'
|
366
366
|
m = re.fullmatch(are, url)
|
367
367
|
if m is None:
|
@@ -697,8 +697,8 @@ def groups(it, args): # pragma: no cover
|
|
697
697
|
all_pats = get_patterns()
|
698
698
|
|
699
699
|
from collections import Counter
|
700
|
-
c: typing.Counter[
|
701
|
-
unmatched:
|
700
|
+
c: typing.Counter[str | None] = Counter()
|
701
|
+
unmatched: list[str] = []
|
702
702
|
|
703
703
|
def dump():
|
704
704
|
print(c)
|
@@ -756,10 +756,10 @@ def groups(it, args): # pragma: no cover
|
|
756
756
|
def display(it, args) -> None: # pragma: no cover
|
757
757
|
# TODO better name?
|
758
758
|
import difflib
|
759
|
-
# pylint: disable=import-error
|
760
|
-
from termcolor import colored as C # type: ignore
|
761
759
|
from sys import stdout
|
762
760
|
|
761
|
+
from termcolor import colored as C # type: ignore
|
762
|
+
|
763
763
|
for line in it:
|
764
764
|
line = line.strip()
|
765
765
|
if args.human:
|