quick-sentiments 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- demo/sentiment_prediction.py +67 -0
- quick_sentiments/__init__.py +7 -0
- quick_sentiments/ml_algo/XGB.py +91 -0
- quick_sentiments/ml_algo/__init__.py +0 -0
- quick_sentiments/ml_algo/logit.py +60 -0
- quick_sentiments/ml_algo/rf.py +67 -0
- quick_sentiments/pipeline.py +128 -0
- quick_sentiments/predict.py +56 -0
- quick_sentiments/preprocess.py +195 -0
- quick_sentiments/vect/BOW.py +28 -0
- quick_sentiments/vect/__init__.py +0 -0
- quick_sentiments/vect/tf.py +35 -0
- quick_sentiments/vect/tfidf.py +27 -0
- quick_sentiments/vect/wv.py +42 -0
- quick_sentiments/virtual_environment_setup.py +102 -0
- quick_sentiments-0.1.9.dist-info/METADATA +133 -0
- quick_sentiments-0.1.9.dist-info/RECORD +784 -0
- quick_sentiments-0.1.9.dist-info/WHEEL +5 -0
- quick_sentiments-0.1.9.dist-info/top_level.txt +4 -0
- quicktest/Lib/site-packages/_distutils_hack/__init__.py +222 -0
- quicktest/Lib/site-packages/_distutils_hack/override.py +1 -0
- quicktest/Lib/site-packages/build/__init__.py +39 -0
- quicktest/Lib/site-packages/build/__main__.py +455 -0
- quicktest/Lib/site-packages/build/_builder.py +355 -0
- quicktest/Lib/site-packages/build/_compat/__init__.py +0 -0
- quicktest/Lib/site-packages/build/_compat/importlib.py +22 -0
- quicktest/Lib/site-packages/build/_compat/tarfile.py +32 -0
- quicktest/Lib/site-packages/build/_compat/tomllib.py +16 -0
- quicktest/Lib/site-packages/build/_ctx.py +98 -0
- quicktest/Lib/site-packages/build/_exceptions.py +65 -0
- quicktest/Lib/site-packages/build/_types.py +23 -0
- quicktest/Lib/site-packages/build/_util.py +63 -0
- quicktest/Lib/site-packages/build/env.py +372 -0
- quicktest/Lib/site-packages/build/py.typed +0 -0
- quicktest/Lib/site-packages/build/util.py +61 -0
- quicktest/Lib/site-packages/colorama/__init__.py +7 -0
- quicktest/Lib/site-packages/colorama/ansi.py +102 -0
- quicktest/Lib/site-packages/colorama/ansitowin32.py +277 -0
- quicktest/Lib/site-packages/colorama/initialise.py +121 -0
- quicktest/Lib/site-packages/colorama/tests/__init__.py +1 -0
- quicktest/Lib/site-packages/colorama/tests/ansi_test.py +76 -0
- quicktest/Lib/site-packages/colorama/tests/ansitowin32_test.py +294 -0
- quicktest/Lib/site-packages/colorama/tests/initialise_test.py +189 -0
- quicktest/Lib/site-packages/colorama/tests/isatty_test.py +57 -0
- quicktest/Lib/site-packages/colorama/tests/utils.py +49 -0
- quicktest/Lib/site-packages/colorama/tests/winterm_test.py +131 -0
- quicktest/Lib/site-packages/colorama/win32.py +180 -0
- quicktest/Lib/site-packages/colorama/winterm.py +195 -0
- quicktest/Lib/site-packages/packaging/__init__.py +15 -0
- quicktest/Lib/site-packages/packaging/_elffile.py +109 -0
- quicktest/Lib/site-packages/packaging/_manylinux.py +262 -0
- quicktest/Lib/site-packages/packaging/_musllinux.py +85 -0
- quicktest/Lib/site-packages/packaging/_parser.py +353 -0
- quicktest/Lib/site-packages/packaging/_structures.py +61 -0
- quicktest/Lib/site-packages/packaging/_tokenizer.py +195 -0
- quicktest/Lib/site-packages/packaging/licenses/__init__.py +145 -0
- quicktest/Lib/site-packages/packaging/licenses/_spdx.py +759 -0
- quicktest/Lib/site-packages/packaging/markers.py +362 -0
- quicktest/Lib/site-packages/packaging/metadata.py +862 -0
- quicktest/Lib/site-packages/packaging/py.typed +0 -0
- quicktest/Lib/site-packages/packaging/requirements.py +91 -0
- quicktest/Lib/site-packages/packaging/specifiers.py +1019 -0
- quicktest/Lib/site-packages/packaging/tags.py +656 -0
- quicktest/Lib/site-packages/packaging/utils.py +163 -0
- quicktest/Lib/site-packages/packaging/version.py +582 -0
- quicktest/Lib/site-packages/pip/__init__.py +13 -0
- quicktest/Lib/site-packages/pip/__main__.py +24 -0
- quicktest/Lib/site-packages/pip/__pip-runner__.py +50 -0
- quicktest/Lib/site-packages/pip/_internal/__init__.py +19 -0
- quicktest/Lib/site-packages/pip/_internal/build_env.py +311 -0
- quicktest/Lib/site-packages/pip/_internal/cache.py +292 -0
- quicktest/Lib/site-packages/pip/_internal/cli/__init__.py +4 -0
- quicktest/Lib/site-packages/pip/_internal/cli/autocompletion.py +171 -0
- quicktest/Lib/site-packages/pip/_internal/cli/base_command.py +236 -0
- quicktest/Lib/site-packages/pip/_internal/cli/cmdoptions.py +1074 -0
- quicktest/Lib/site-packages/pip/_internal/cli/command_context.py +27 -0
- quicktest/Lib/site-packages/pip/_internal/cli/main.py +79 -0
- quicktest/Lib/site-packages/pip/_internal/cli/main_parser.py +134 -0
- quicktest/Lib/site-packages/pip/_internal/cli/parser.py +294 -0
- quicktest/Lib/site-packages/pip/_internal/cli/progress_bars.py +68 -0
- quicktest/Lib/site-packages/pip/_internal/cli/req_command.py +508 -0
- quicktest/Lib/site-packages/pip/_internal/cli/spinners.py +159 -0
- quicktest/Lib/site-packages/pip/_internal/cli/status_codes.py +6 -0
- quicktest/Lib/site-packages/pip/_internal/commands/__init__.py +132 -0
- quicktest/Lib/site-packages/pip/_internal/commands/cache.py +222 -0
- quicktest/Lib/site-packages/pip/_internal/commands/check.py +54 -0
- quicktest/Lib/site-packages/pip/_internal/commands/completion.py +121 -0
- quicktest/Lib/site-packages/pip/_internal/commands/configuration.py +282 -0
- quicktest/Lib/site-packages/pip/_internal/commands/debug.py +199 -0
- quicktest/Lib/site-packages/pip/_internal/commands/download.py +147 -0
- quicktest/Lib/site-packages/pip/_internal/commands/freeze.py +108 -0
- quicktest/Lib/site-packages/pip/_internal/commands/hash.py +59 -0
- quicktest/Lib/site-packages/pip/_internal/commands/help.py +41 -0
- quicktest/Lib/site-packages/pip/_internal/commands/index.py +139 -0
- quicktest/Lib/site-packages/pip/_internal/commands/inspect.py +92 -0
- quicktest/Lib/site-packages/pip/_internal/commands/install.py +778 -0
- quicktest/Lib/site-packages/pip/_internal/commands/list.py +368 -0
- quicktest/Lib/site-packages/pip/_internal/commands/search.py +174 -0
- quicktest/Lib/site-packages/pip/_internal/commands/show.py +189 -0
- quicktest/Lib/site-packages/pip/_internal/commands/uninstall.py +113 -0
- quicktest/Lib/site-packages/pip/_internal/commands/wheel.py +183 -0
- quicktest/Lib/site-packages/pip/_internal/configuration.py +381 -0
- quicktest/Lib/site-packages/pip/_internal/distributions/__init__.py +21 -0
- quicktest/Lib/site-packages/pip/_internal/distributions/base.py +39 -0
- quicktest/Lib/site-packages/pip/_internal/distributions/installed.py +23 -0
- quicktest/Lib/site-packages/pip/_internal/distributions/sdist.py +150 -0
- quicktest/Lib/site-packages/pip/_internal/distributions/wheel.py +34 -0
- quicktest/Lib/site-packages/pip/_internal/exceptions.py +733 -0
- quicktest/Lib/site-packages/pip/_internal/index/__init__.py +2 -0
- quicktest/Lib/site-packages/pip/_internal/index/collector.py +505 -0
- quicktest/Lib/site-packages/pip/_internal/index/package_finder.py +1029 -0
- quicktest/Lib/site-packages/pip/_internal/index/sources.py +223 -0
- quicktest/Lib/site-packages/pip/_internal/locations/__init__.py +467 -0
- quicktest/Lib/site-packages/pip/_internal/locations/_distutils.py +173 -0
- quicktest/Lib/site-packages/pip/_internal/locations/_sysconfig.py +213 -0
- quicktest/Lib/site-packages/pip/_internal/locations/base.py +81 -0
- quicktest/Lib/site-packages/pip/_internal/main.py +12 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/__init__.py +127 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/_json.py +84 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/base.py +688 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/importlib/__init__.py +4 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/importlib/_compat.py +55 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/importlib/_dists.py +224 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/importlib/_envs.py +188 -0
- quicktest/Lib/site-packages/pip/_internal/metadata/pkg_resources.py +270 -0
- quicktest/Lib/site-packages/pip/_internal/models/__init__.py +2 -0
- quicktest/Lib/site-packages/pip/_internal/models/candidate.py +34 -0
- quicktest/Lib/site-packages/pip/_internal/models/direct_url.py +237 -0
- quicktest/Lib/site-packages/pip/_internal/models/format_control.py +80 -0
- quicktest/Lib/site-packages/pip/_internal/models/index.py +28 -0
- quicktest/Lib/site-packages/pip/_internal/models/installation_report.py +53 -0
- quicktest/Lib/site-packages/pip/_internal/models/link.py +581 -0
- quicktest/Lib/site-packages/pip/_internal/models/scheme.py +31 -0
- quicktest/Lib/site-packages/pip/_internal/models/search_scope.py +132 -0
- quicktest/Lib/site-packages/pip/_internal/models/selection_prefs.py +51 -0
- quicktest/Lib/site-packages/pip/_internal/models/target_python.py +110 -0
- quicktest/Lib/site-packages/pip/_internal/models/wheel.py +92 -0
- quicktest/Lib/site-packages/pip/_internal/network/__init__.py +2 -0
- quicktest/Lib/site-packages/pip/_internal/network/auth.py +561 -0
- quicktest/Lib/site-packages/pip/_internal/network/cache.py +69 -0
- quicktest/Lib/site-packages/pip/_internal/network/download.py +186 -0
- quicktest/Lib/site-packages/pip/_internal/network/lazy_wheel.py +210 -0
- quicktest/Lib/site-packages/pip/_internal/network/session.py +519 -0
- quicktest/Lib/site-packages/pip/_internal/network/utils.py +96 -0
- quicktest/Lib/site-packages/pip/_internal/network/xmlrpc.py +60 -0
- quicktest/Lib/site-packages/pip/_internal/operations/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/build_tracker.py +124 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/metadata.py +39 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/metadata_editable.py +41 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/metadata_legacy.py +74 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/wheel.py +37 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/wheel_editable.py +46 -0
- quicktest/Lib/site-packages/pip/_internal/operations/build/wheel_legacy.py +102 -0
- quicktest/Lib/site-packages/pip/_internal/operations/check.py +187 -0
- quicktest/Lib/site-packages/pip/_internal/operations/freeze.py +255 -0
- quicktest/Lib/site-packages/pip/_internal/operations/install/__init__.py +2 -0
- quicktest/Lib/site-packages/pip/_internal/operations/install/editable_legacy.py +46 -0
- quicktest/Lib/site-packages/pip/_internal/operations/install/wheel.py +740 -0
- quicktest/Lib/site-packages/pip/_internal/operations/prepare.py +743 -0
- quicktest/Lib/site-packages/pip/_internal/pyproject.py +179 -0
- quicktest/Lib/site-packages/pip/_internal/req/__init__.py +92 -0
- quicktest/Lib/site-packages/pip/_internal/req/constructors.py +506 -0
- quicktest/Lib/site-packages/pip/_internal/req/req_file.py +552 -0
- quicktest/Lib/site-packages/pip/_internal/req/req_install.py +874 -0
- quicktest/Lib/site-packages/pip/_internal/req/req_set.py +119 -0
- quicktest/Lib/site-packages/pip/_internal/req/req_uninstall.py +650 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/base.py +20 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/legacy/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/legacy/resolver.py +600 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/base.py +141 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/candidates.py +555 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/factory.py +730 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/found_candidates.py +155 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/provider.py +255 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/reporter.py +80 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/requirements.py +165 -0
- quicktest/Lib/site-packages/pip/_internal/resolution/resolvelib/resolver.py +299 -0
- quicktest/Lib/site-packages/pip/_internal/self_outdated_check.py +242 -0
- quicktest/Lib/site-packages/pip/_internal/utils/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_internal/utils/_jaraco_text.py +109 -0
- quicktest/Lib/site-packages/pip/_internal/utils/_log.py +38 -0
- quicktest/Lib/site-packages/pip/_internal/utils/appdirs.py +52 -0
- quicktest/Lib/site-packages/pip/_internal/utils/compat.py +63 -0
- quicktest/Lib/site-packages/pip/_internal/utils/compatibility_tags.py +165 -0
- quicktest/Lib/site-packages/pip/_internal/utils/datetime.py +11 -0
- quicktest/Lib/site-packages/pip/_internal/utils/deprecation.py +120 -0
- quicktest/Lib/site-packages/pip/_internal/utils/direct_url_helpers.py +87 -0
- quicktest/Lib/site-packages/pip/_internal/utils/egg_link.py +72 -0
- quicktest/Lib/site-packages/pip/_internal/utils/encoding.py +36 -0
- quicktest/Lib/site-packages/pip/_internal/utils/entrypoints.py +84 -0
- quicktest/Lib/site-packages/pip/_internal/utils/filesystem.py +153 -0
- quicktest/Lib/site-packages/pip/_internal/utils/filetypes.py +27 -0
- quicktest/Lib/site-packages/pip/_internal/utils/glibc.py +88 -0
- quicktest/Lib/site-packages/pip/_internal/utils/hashes.py +151 -0
- quicktest/Lib/site-packages/pip/_internal/utils/inject_securetransport.py +35 -0
- quicktest/Lib/site-packages/pip/_internal/utils/logging.py +348 -0
- quicktest/Lib/site-packages/pip/_internal/utils/misc.py +735 -0
- quicktest/Lib/site-packages/pip/_internal/utils/models.py +39 -0
- quicktest/Lib/site-packages/pip/_internal/utils/packaging.py +57 -0
- quicktest/Lib/site-packages/pip/_internal/utils/setuptools_build.py +146 -0
- quicktest/Lib/site-packages/pip/_internal/utils/subprocess.py +260 -0
- quicktest/Lib/site-packages/pip/_internal/utils/temp_dir.py +246 -0
- quicktest/Lib/site-packages/pip/_internal/utils/unpacking.py +257 -0
- quicktest/Lib/site-packages/pip/_internal/utils/urls.py +62 -0
- quicktest/Lib/site-packages/pip/_internal/utils/virtualenv.py +104 -0
- quicktest/Lib/site-packages/pip/_internal/utils/wheel.py +136 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/__init__.py +15 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/bazaar.py +112 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/git.py +526 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/mercurial.py +163 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/subversion.py +324 -0
- quicktest/Lib/site-packages/pip/_internal/vcs/versioncontrol.py +705 -0
- quicktest/Lib/site-packages/pip/_internal/wheel_builder.py +355 -0
- quicktest/Lib/site-packages/pip/_vendor/__init__.py +120 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/__init__.py +18 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/_cmd.py +61 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/adapter.py +137 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/cache.py +65 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/caches/__init__.py +9 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/caches/file_cache.py +188 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/caches/redis_cache.py +39 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/compat.py +32 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/controller.py +439 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/filewrapper.py +111 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/heuristics.py +139 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/serialize.py +190 -0
- quicktest/Lib/site-packages/pip/_vendor/cachecontrol/wrapper.py +33 -0
- quicktest/Lib/site-packages/pip/_vendor/certifi/__init__.py +4 -0
- quicktest/Lib/site-packages/pip/_vendor/certifi/__main__.py +12 -0
- quicktest/Lib/site-packages/pip/_vendor/certifi/core.py +108 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/__init__.py +115 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/big5freq.py +386 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/big5prober.py +47 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/chardistribution.py +261 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/charsetgroupprober.py +106 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/charsetprober.py +147 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/cli/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/cli/chardetect.py +112 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/codingstatemachine.py +90 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/codingstatemachinedict.py +19 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/cp949prober.py +49 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/enums.py +85 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/escprober.py +102 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/escsm.py +261 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/eucjpprober.py +102 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/euckrfreq.py +196 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/euckrprober.py +47 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/euctwfreq.py +388 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/euctwprober.py +47 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/gb2312freq.py +284 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/gb2312prober.py +47 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/hebrewprober.py +316 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/jisfreq.py +325 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/johabfreq.py +2382 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/johabprober.py +47 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/jpcntx.py +238 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langbulgarianmodel.py +4649 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langgreekmodel.py +4397 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langhebrewmodel.py +4380 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langhungarianmodel.py +4649 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langrussianmodel.py +5725 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langthaimodel.py +4380 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/langturkishmodel.py +4380 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/latin1prober.py +147 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/macromanprober.py +162 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/mbcharsetprober.py +95 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/mbcsgroupprober.py +57 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/mbcssm.py +661 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/metadata/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/metadata/languages.py +352 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/resultdict.py +16 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/sbcharsetprober.py +162 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/sbcsgroupprober.py +88 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/sjisprober.py +105 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/universaldetector.py +362 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/utf1632prober.py +225 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/utf8prober.py +82 -0
- quicktest/Lib/site-packages/pip/_vendor/chardet/version.py +9 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/__init__.py +7 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/ansi.py +102 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/ansitowin32.py +277 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/initialise.py +121 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/__init__.py +1 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/ansi_test.py +76 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/ansitowin32_test.py +294 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/initialise_test.py +189 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/isatty_test.py +57 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/utils.py +49 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/tests/winterm_test.py +131 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/win32.py +180 -0
- quicktest/Lib/site-packages/pip/_vendor/colorama/winterm.py +195 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/__init__.py +23 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/compat.py +1116 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/database.py +1350 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/index.py +508 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/locators.py +1300 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/manifest.py +393 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/markers.py +152 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/metadata.py +1076 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/resources.py +358 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/scripts.py +437 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/util.py +1932 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/version.py +739 -0
- quicktest/Lib/site-packages/pip/_vendor/distlib/wheel.py +1082 -0
- quicktest/Lib/site-packages/pip/_vendor/distro/__init__.py +54 -0
- quicktest/Lib/site-packages/pip/_vendor/distro/__main__.py +4 -0
- quicktest/Lib/site-packages/pip/_vendor/distro/distro.py +1399 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/__init__.py +44 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/codec.py +112 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/compat.py +13 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/core.py +400 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/idnadata.py +2151 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/intranges.py +54 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/package_data.py +2 -0
- quicktest/Lib/site-packages/pip/_vendor/idna/uts46data.py +8600 -0
- quicktest/Lib/site-packages/pip/_vendor/msgpack/__init__.py +57 -0
- quicktest/Lib/site-packages/pip/_vendor/msgpack/exceptions.py +48 -0
- quicktest/Lib/site-packages/pip/_vendor/msgpack/ext.py +193 -0
- quicktest/Lib/site-packages/pip/_vendor/msgpack/fallback.py +1010 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/__about__.py +26 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/__init__.py +25 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/_manylinux.py +301 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/_musllinux.py +136 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/_structures.py +61 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/markers.py +304 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/requirements.py +146 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/specifiers.py +802 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/tags.py +487 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/utils.py +136 -0
- quicktest/Lib/site-packages/pip/_vendor/packaging/version.py +504 -0
- quicktest/Lib/site-packages/pip/_vendor/pkg_resources/__init__.py +3361 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/__init__.py +566 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/__main__.py +53 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/android.py +210 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/api.py +223 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/macos.py +91 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/unix.py +223 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/version.py +4 -0
- quicktest/Lib/site-packages/pip/_vendor/platformdirs/windows.py +255 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/__init__.py +82 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/__main__.py +17 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/cmdline.py +668 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/console.py +70 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/filter.py +71 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/filters/__init__.py +940 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatter.py +124 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/__init__.py +158 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/_mapping.py +23 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/bbcode.py +108 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/groff.py +170 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/html.py +989 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/img.py +645 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/irc.py +154 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/latex.py +521 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/other.py +161 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/pangomarkup.py +83 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/rtf.py +146 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/svg.py +188 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/terminal.py +127 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/formatters/terminal256.py +338 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/lexer.py +943 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/lexers/__init__.py +362 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/lexers/_mapping.py +559 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/lexers/python.py +1198 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/modeline.py +43 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/plugin.py +88 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/regexopt.py +91 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/scanner.py +104 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/sphinxext.py +217 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/style.py +197 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/styles/__init__.py +103 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/token.py +213 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/unistring.py +153 -0
- quicktest/Lib/site-packages/pip/_vendor/pygments/util.py +330 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/__init__.py +322 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/actions.py +217 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/common.py +432 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/core.py +6115 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/diagram/__init__.py +656 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/exceptions.py +299 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/helpers.py +1100 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/results.py +796 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/testing.py +331 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/unicode.py +361 -0
- quicktest/Lib/site-packages/pip/_vendor/pyparsing/util.py +284 -0
- quicktest/Lib/site-packages/pip/_vendor/pyproject_hooks/__init__.py +23 -0
- quicktest/Lib/site-packages/pip/_vendor/pyproject_hooks/_compat.py +8 -0
- quicktest/Lib/site-packages/pip/_vendor/pyproject_hooks/_impl.py +330 -0
- quicktest/Lib/site-packages/pip/_vendor/pyproject_hooks/_in_process/__init__.py +18 -0
- quicktest/Lib/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py +353 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/__init__.py +182 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/__version__.py +14 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/_internal_utils.py +50 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/adapters.py +538 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/api.py +157 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/auth.py +315 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/certs.py +24 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/compat.py +67 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/cookies.py +561 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/exceptions.py +141 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/help.py +131 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/hooks.py +33 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/models.py +1034 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/packages.py +16 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/sessions.py +833 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/status_codes.py +128 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/structures.py +99 -0
- quicktest/Lib/site-packages/pip/_vendor/requests/utils.py +1094 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/__init__.py +26 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/compat/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/compat/collections_abc.py +6 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/providers.py +133 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/reporters.py +43 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/resolvers.py +547 -0
- quicktest/Lib/site-packages/pip/_vendor/resolvelib/structs.py +170 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/__init__.py +177 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/__main__.py +274 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_cell_widths.py +451 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_emoji_codes.py +3610 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_emoji_replace.py +32 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_export_format.py +76 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_extension.py +10 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_fileno.py +24 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_inspect.py +270 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_log_render.py +94 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_loop.py +43 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_null_file.py +69 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_palettes.py +309 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_pick.py +17 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_ratio.py +160 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_spinners.py +482 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_stack.py +16 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_timer.py +19 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_win32_console.py +662 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_windows.py +72 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_windows_renderer.py +56 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/_wrap.py +56 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/abc.py +33 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/align.py +311 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/ansi.py +240 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/bar.py +94 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/box.py +517 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/cells.py +154 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/color.py +622 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/color_triplet.py +38 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/columns.py +187 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/console.py +2633 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/constrain.py +37 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/containers.py +167 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/control.py +225 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/default_styles.py +190 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/diagnose.py +37 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/emoji.py +96 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/errors.py +34 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/file_proxy.py +57 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/filesize.py +89 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/highlighter.py +232 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/json.py +140 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/jupyter.py +101 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/layout.py +443 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/live.py +375 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/live_render.py +113 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/logging.py +289 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/markup.py +246 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/measure.py +151 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/padding.py +141 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/pager.py +34 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/palette.py +100 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/panel.py +308 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/pretty.py +994 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/progress.py +1702 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/progress_bar.py +224 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/prompt.py +376 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/protocol.py +42 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/region.py +10 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/repr.py +149 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/rule.py +130 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/scope.py +86 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/screen.py +54 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/segment.py +739 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/spinner.py +137 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/status.py +132 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/style.py +796 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/styled.py +42 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/syntax.py +948 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/table.py +1002 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/terminal_theme.py +153 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/text.py +1307 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/theme.py +115 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/themes.py +5 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/traceback.py +756 -0
- quicktest/Lib/site-packages/pip/_vendor/rich/tree.py +251 -0
- quicktest/Lib/site-packages/pip/_vendor/six.py +998 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/__init__.py +608 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/_asyncio.py +94 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/_utils.py +76 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/after.py +51 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/before.py +46 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/before_sleep.py +71 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/nap.py +43 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/retry.py +272 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/stop.py +103 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/tornadoweb.py +59 -0
- quicktest/Lib/site-packages/pip/_vendor/tenacity/wait.py +228 -0
- quicktest/Lib/site-packages/pip/_vendor/tomli/__init__.py +11 -0
- quicktest/Lib/site-packages/pip/_vendor/tomli/_parser.py +691 -0
- quicktest/Lib/site-packages/pip/_vendor/tomli/_re.py +107 -0
- quicktest/Lib/site-packages/pip/_vendor/tomli/_types.py +10 -0
- quicktest/Lib/site-packages/pip/_vendor/typing_extensions.py +3072 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/__init__.py +102 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/_collections.py +337 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/_version.py +2 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/connection.py +572 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/connectionpool.py +1132 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/_appengine_environ.py +36 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/_securetransport/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/_securetransport/bindings.py +519 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/_securetransport/low_level.py +397 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/appengine.py +314 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/ntlmpool.py +130 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/pyopenssl.py +518 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/securetransport.py +921 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/contrib/socks.py +216 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/exceptions.py +323 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/fields.py +274 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/filepost.py +98 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/packages/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/packages/backports/__init__.py +0 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/packages/backports/makefile.py +51 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/packages/backports/weakref_finalize.py +155 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/packages/six.py +1076 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/poolmanager.py +537 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/request.py +170 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/response.py +879 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/__init__.py +49 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/connection.py +149 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/proxy.py +57 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/queue.py +22 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/request.py +137 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/response.py +107 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/retry.py +620 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/ssl_.py +495 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/ssl_match_hostname.py +159 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/ssltransport.py +221 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/timeout.py +271 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/url.py +435 -0
- quicktest/Lib/site-packages/pip/_vendor/urllib3/util/wait.py +152 -0
- quicktest/Lib/site-packages/pip/_vendor/webencodings/__init__.py +342 -0
- quicktest/Lib/site-packages/pip/_vendor/webencodings/labels.py +231 -0
- quicktest/Lib/site-packages/pip/_vendor/webencodings/mklabels.py +59 -0
- quicktest/Lib/site-packages/pip/_vendor/webencodings/tests.py +153 -0
- quicktest/Lib/site-packages/pip/_vendor/webencodings/x_user_defined.py +325 -0
- quicktest/Lib/site-packages/pip/py.typed +4 -0
- quicktest/Lib/site-packages/pkg_resources/__init__.py +3296 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/__init__.py +0 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/appdirs.py +608 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/__init__.py +36 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/_adapters.py +170 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/_common.py +104 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/_compat.py +98 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/_itertools.py +35 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/_legacy.py +121 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/abc.py +137 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/readers.py +122 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/importlib_resources/simple.py +116 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/jaraco/__init__.py +0 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/jaraco/context.py +213 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/jaraco/functools.py +525 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/jaraco/text/__init__.py +599 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/more_itertools/__init__.py +4 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/more_itertools/more.py +4316 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/more_itertools/recipes.py +698 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/__about__.py +26 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/__init__.py +25 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/_manylinux.py +301 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/_musllinux.py +136 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/_structures.py +61 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/markers.py +304 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/requirements.py +146 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/specifiers.py +802 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/tags.py +487 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/utils.py +136 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/packaging/version.py +504 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/__init__.py +331 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/actions.py +207 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/common.py +424 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/core.py +5814 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/diagram/__init__.py +642 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/exceptions.py +267 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/helpers.py +1088 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/results.py +760 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/testing.py +331 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/unicode.py +352 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/pyparsing/util.py +235 -0
- quicktest/Lib/site-packages/pkg_resources/_vendor/zipp.py +329 -0
- quicktest/Lib/site-packages/pkg_resources/extern/__init__.py +76 -0
- quicktest/Lib/site-packages/pyproject_hooks/__init__.py +31 -0
- quicktest/Lib/site-packages/pyproject_hooks/_impl.py +410 -0
- quicktest/Lib/site-packages/pyproject_hooks/_in_process/__init__.py +21 -0
- quicktest/Lib/site-packages/pyproject_hooks/_in_process/_in_process.py +389 -0
- quicktest/Lib/site-packages/pyproject_hooks/py.typed +0 -0
- quicktest/Lib/site-packages/setuptools/__init__.py +247 -0
- quicktest/Lib/site-packages/setuptools/_deprecation_warning.py +7 -0
- quicktest/Lib/site-packages/setuptools/_distutils/__init__.py +24 -0
- quicktest/Lib/site-packages/setuptools/_distutils/_collections.py +56 -0
- quicktest/Lib/site-packages/setuptools/_distutils/_functools.py +20 -0
- quicktest/Lib/site-packages/setuptools/_distutils/_macos_compat.py +12 -0
- quicktest/Lib/site-packages/setuptools/_distutils/_msvccompiler.py +572 -0
- quicktest/Lib/site-packages/setuptools/_distutils/archive_util.py +280 -0
- quicktest/Lib/site-packages/setuptools/_distutils/bcppcompiler.py +408 -0
- quicktest/Lib/site-packages/setuptools/_distutils/ccompiler.py +1220 -0
- quicktest/Lib/site-packages/setuptools/_distutils/cmd.py +436 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/__init__.py +25 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/_framework_compat.py +55 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/bdist.py +157 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/bdist_dumb.py +144 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/bdist_rpm.py +615 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/build.py +153 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/build_clib.py +208 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/build_ext.py +787 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/build_py.py +407 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/build_scripts.py +173 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/check.py +151 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/clean.py +76 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/config.py +377 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install.py +814 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install_data.py +84 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install_egg_info.py +91 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install_headers.py +45 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install_lib.py +238 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/install_scripts.py +61 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/py37compat.py +31 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/register.py +319 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/sdist.py +531 -0
- quicktest/Lib/site-packages/setuptools/_distutils/command/upload.py +205 -0
- quicktest/Lib/site-packages/setuptools/_distutils/config.py +139 -0
- quicktest/Lib/site-packages/setuptools/_distutils/core.py +291 -0
- quicktest/Lib/site-packages/setuptools/_distutils/cygwinccompiler.py +364 -0
- quicktest/Lib/site-packages/setuptools/_distutils/debug.py +5 -0
- quicktest/Lib/site-packages/setuptools/_distutils/dep_util.py +96 -0
- quicktest/Lib/site-packages/setuptools/_distutils/dir_util.py +243 -0
- quicktest/Lib/site-packages/setuptools/_distutils/dist.py +1286 -0
- quicktest/Lib/site-packages/setuptools/_distutils/errors.py +127 -0
- quicktest/Lib/site-packages/setuptools/_distutils/extension.py +248 -0
- quicktest/Lib/site-packages/setuptools/_distutils/fancy_getopt.py +470 -0
- quicktest/Lib/site-packages/setuptools/_distutils/file_util.py +249 -0
- quicktest/Lib/site-packages/setuptools/_distutils/filelist.py +371 -0
- quicktest/Lib/site-packages/setuptools/_distutils/log.py +80 -0
- quicktest/Lib/site-packages/setuptools/_distutils/msvc9compiler.py +832 -0
- quicktest/Lib/site-packages/setuptools/_distutils/msvccompiler.py +695 -0
- quicktest/Lib/site-packages/setuptools/_distutils/py38compat.py +8 -0
- quicktest/Lib/site-packages/setuptools/_distutils/py39compat.py +22 -0
- quicktest/Lib/site-packages/setuptools/_distutils/spawn.py +109 -0
- quicktest/Lib/site-packages/setuptools/_distutils/sysconfig.py +558 -0
- quicktest/Lib/site-packages/setuptools/_distutils/text_file.py +287 -0
- quicktest/Lib/site-packages/setuptools/_distutils/unixccompiler.py +401 -0
- quicktest/Lib/site-packages/setuptools/_distutils/util.py +513 -0
- quicktest/Lib/site-packages/setuptools/_distutils/version.py +358 -0
- quicktest/Lib/site-packages/setuptools/_distutils/versionpredicate.py +175 -0
- quicktest/Lib/site-packages/setuptools/_entry_points.py +86 -0
- quicktest/Lib/site-packages/setuptools/_imp.py +82 -0
- quicktest/Lib/site-packages/setuptools/_importlib.py +47 -0
- quicktest/Lib/site-packages/setuptools/_itertools.py +23 -0
- quicktest/Lib/site-packages/setuptools/_path.py +29 -0
- quicktest/Lib/site-packages/setuptools/_reqs.py +19 -0
- quicktest/Lib/site-packages/setuptools/_vendor/__init__.py +0 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/__init__.py +1047 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_adapters.py +68 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_collections.py +30 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_compat.py +71 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_functools.py +104 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_itertools.py +73 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_meta.py +48 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_metadata/_text.py +99 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/__init__.py +36 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/_adapters.py +170 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/_common.py +104 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/_compat.py +98 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/_itertools.py +35 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/_legacy.py +121 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/abc.py +137 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/readers.py +122 -0
- quicktest/Lib/site-packages/setuptools/_vendor/importlib_resources/simple.py +116 -0
- quicktest/Lib/site-packages/setuptools/_vendor/jaraco/__init__.py +0 -0
- quicktest/Lib/site-packages/setuptools/_vendor/jaraco/context.py +213 -0
- quicktest/Lib/site-packages/setuptools/_vendor/jaraco/functools.py +525 -0
- quicktest/Lib/site-packages/setuptools/_vendor/jaraco/text/__init__.py +599 -0
- quicktest/Lib/site-packages/setuptools/_vendor/more_itertools/__init__.py +4 -0
- quicktest/Lib/site-packages/setuptools/_vendor/more_itertools/more.py +3824 -0
- quicktest/Lib/site-packages/setuptools/_vendor/more_itertools/recipes.py +620 -0
- quicktest/Lib/site-packages/setuptools/_vendor/ordered_set.py +488 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/__about__.py +26 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/__init__.py +25 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/_manylinux.py +301 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/_musllinux.py +136 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/_structures.py +61 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/markers.py +304 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/requirements.py +146 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/specifiers.py +802 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/tags.py +487 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/utils.py +136 -0
- quicktest/Lib/site-packages/setuptools/_vendor/packaging/version.py +504 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/__init__.py +331 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/actions.py +207 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/common.py +424 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/core.py +5814 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/diagram/__init__.py +642 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/exceptions.py +267 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/helpers.py +1088 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/results.py +760 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/testing.py +331 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/unicode.py +352 -0
- quicktest/Lib/site-packages/setuptools/_vendor/pyparsing/util.py +235 -0
- quicktest/Lib/site-packages/setuptools/_vendor/tomli/__init__.py +11 -0
- quicktest/Lib/site-packages/setuptools/_vendor/tomli/_parser.py +691 -0
- quicktest/Lib/site-packages/setuptools/_vendor/tomli/_re.py +107 -0
- quicktest/Lib/site-packages/setuptools/_vendor/tomli/_types.py +10 -0
- quicktest/Lib/site-packages/setuptools/_vendor/typing_extensions.py +2296 -0
- quicktest/Lib/site-packages/setuptools/_vendor/zipp.py +329 -0
- quicktest/Lib/site-packages/setuptools/archive_util.py +213 -0
- quicktest/Lib/site-packages/setuptools/build_meta.py +511 -0
- quicktest/Lib/site-packages/setuptools/command/__init__.py +12 -0
- quicktest/Lib/site-packages/setuptools/command/alias.py +78 -0
- quicktest/Lib/site-packages/setuptools/command/bdist_egg.py +457 -0
- quicktest/Lib/site-packages/setuptools/command/bdist_rpm.py +40 -0
- quicktest/Lib/site-packages/setuptools/command/build.py +146 -0
- quicktest/Lib/site-packages/setuptools/command/build_clib.py +101 -0
- quicktest/Lib/site-packages/setuptools/command/build_ext.py +383 -0
- quicktest/Lib/site-packages/setuptools/command/build_py.py +368 -0
- quicktest/Lib/site-packages/setuptools/command/develop.py +193 -0
- quicktest/Lib/site-packages/setuptools/command/dist_info.py +142 -0
- quicktest/Lib/site-packages/setuptools/command/easy_install.py +2312 -0
- quicktest/Lib/site-packages/setuptools/command/editable_wheel.py +844 -0
- quicktest/Lib/site-packages/setuptools/command/egg_info.py +763 -0
- quicktest/Lib/site-packages/setuptools/command/install.py +139 -0
- quicktest/Lib/site-packages/setuptools/command/install_egg_info.py +63 -0
- quicktest/Lib/site-packages/setuptools/command/install_lib.py +122 -0
- quicktest/Lib/site-packages/setuptools/command/install_scripts.py +70 -0
- quicktest/Lib/site-packages/setuptools/command/py36compat.py +134 -0
- quicktest/Lib/site-packages/setuptools/command/register.py +18 -0
- quicktest/Lib/site-packages/setuptools/command/rotate.py +64 -0
- quicktest/Lib/site-packages/setuptools/command/saveopts.py +22 -0
- quicktest/Lib/site-packages/setuptools/command/sdist.py +210 -0
- quicktest/Lib/site-packages/setuptools/command/setopt.py +149 -0
- quicktest/Lib/site-packages/setuptools/command/test.py +251 -0
- quicktest/Lib/site-packages/setuptools/command/upload.py +17 -0
- quicktest/Lib/site-packages/setuptools/command/upload_docs.py +213 -0
- quicktest/Lib/site-packages/setuptools/config/__init__.py +35 -0
- quicktest/Lib/site-packages/setuptools/config/_apply_pyprojecttoml.py +377 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/__init__.py +34 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/error_reporting.py +318 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/extra_validations.py +36 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_exceptions.py +51 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/fastjsonschema_validations.py +1035 -0
- quicktest/Lib/site-packages/setuptools/config/_validate_pyproject/formats.py +259 -0
- quicktest/Lib/site-packages/setuptools/config/expand.py +462 -0
- quicktest/Lib/site-packages/setuptools/config/pyprojecttoml.py +493 -0
- quicktest/Lib/site-packages/setuptools/config/setupcfg.py +762 -0
- quicktest/Lib/site-packages/setuptools/dep_util.py +25 -0
- quicktest/Lib/site-packages/setuptools/depends.py +176 -0
- quicktest/Lib/site-packages/setuptools/discovery.py +600 -0
- quicktest/Lib/site-packages/setuptools/dist.py +1222 -0
- quicktest/Lib/site-packages/setuptools/errors.py +58 -0
- quicktest/Lib/site-packages/setuptools/extension.py +148 -0
- quicktest/Lib/site-packages/setuptools/extern/__init__.py +76 -0
- quicktest/Lib/site-packages/setuptools/glob.py +167 -0
- quicktest/Lib/site-packages/setuptools/installer.py +104 -0
- quicktest/Lib/site-packages/setuptools/launch.py +36 -0
- quicktest/Lib/site-packages/setuptools/logging.py +36 -0
- quicktest/Lib/site-packages/setuptools/monkey.py +165 -0
- quicktest/Lib/site-packages/setuptools/msvc.py +1703 -0
- quicktest/Lib/site-packages/setuptools/namespaces.py +107 -0
- quicktest/Lib/site-packages/setuptools/package_index.py +1126 -0
- quicktest/Lib/site-packages/setuptools/py34compat.py +13 -0
- quicktest/Lib/site-packages/setuptools/sandbox.py +530 -0
- quicktest/Lib/site-packages/setuptools/unicode_utils.py +42 -0
- quicktest/Lib/site-packages/setuptools/version.py +6 -0
- quicktest/Lib/site-packages/setuptools/wheel.py +222 -0
- quicktest/Lib/site-packages/setuptools/windows_support.py +29 -0
- tests/test_preprocess.py +14 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
! pip install .\dist\quick_sentiments-0.1.6-py3-none-any.whl
|
|
2
|
+
#load the packages
|
|
3
|
+
import polars as pl
|
|
4
|
+
#make sure that this is the main file
|
|
5
|
+
import sys
|
|
6
|
+
import os
|
|
7
|
+
project_root = os.getcwd()
|
|
8
|
+
sys.path.insert(0, project_root)
|
|
9
|
+
|
|
10
|
+
# here I have three python script I built to pre_process the data and running the pipeline
|
|
11
|
+
# you can find the code in the tools/preprocess.py file
|
|
12
|
+
# you can find the code in the tools/pipeline.py file
|
|
13
|
+
# the pre_process function is used to clean the text data, there are various options available, please check the tools/preprocess.py file for details
|
|
14
|
+
# the run_pipeline function is used to run the sentimental analysis pipeline, it takes the training data and the vectorizer and machine learning methods as input, and returns the results
|
|
15
|
+
|
|
16
|
+
# the run_pipeline function is used to run the sentimental analysis pipeline, it takes the training data and the vectorizer and machine learning methods as input, and returns the results
|
|
17
|
+
from quick_sentiments import pre_process
|
|
18
|
+
#this function will run the sentimental analysis in the training data and return the results
|
|
19
|
+
from quick_sentiments import run_pipeline
|
|
20
|
+
# this function will run the sentimental analysis in the new data and return the predictions
|
|
21
|
+
from quick_sentiments import make_predictions
|
|
22
|
+
|
|
23
|
+
# ENTER YOUR PATHS HERE FOR THE TRAINING DATA SET
|
|
24
|
+
path1 = "training_data/train.csv" #give path to the training data
|
|
25
|
+
df_train = pl.read_csv(path1, has_header=True, encoding="utf8")
|
|
26
|
+
|
|
27
|
+
# ENTER YOUR PATHS HERE FOR THE TESTING DATA SET
|
|
28
|
+
path2 = "new_data/test.csv" #give path to the test data
|
|
29
|
+
df_test = pl.read_csv(path2, has_header=True, encoding="utf8")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ENTER THE COLUMN NAMES HERE
|
|
33
|
+
response_column = "reviewText" # feel free to change the column name to your text column name
|
|
34
|
+
sentiment_column = "sentiment" # feel free to change the column name to your label column name
|
|
35
|
+
|
|
36
|
+
df_train = df_train.with_columns(
|
|
37
|
+
pl.col(response_column).map_elements(lambda x: pre_process(x)).alias("processed") #add inside the map_elements
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
dt= run_pipeline(
|
|
41
|
+
vectorizer_name="BOW", # BOW, tf, tfidf, wv
|
|
42
|
+
model_name="logit", # logit, rf, XGB .#XGB takes long time, can not recommend using it on normal case
|
|
43
|
+
df=df_train,
|
|
44
|
+
text_column_name="processed", # this is the column name of the text data,
|
|
45
|
+
sentiment_column_name = "sentiment",
|
|
46
|
+
perform_tuning = False # make this true if you want to perform hyperparameter tuning, it will take longer time and
|
|
47
|
+
# may run out of memory if the dataset is large,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
new_data = df_test.with_columns(
|
|
51
|
+
pl.col(response_column).map_elements(lambda x: pre_process(x).alias("processed") #add inside the map_elements
|
|
52
|
+
))
|
|
53
|
+
|
|
54
|
+
# MAKE PREDICTIONS ON THE NEW DATA
|
|
55
|
+
sentiments_prediction= make_predictions(
|
|
56
|
+
new_data=new_data,
|
|
57
|
+
text_column_name="processed",
|
|
58
|
+
vectorizer=dt["vectorizer_object"],
|
|
59
|
+
best_model=dt["model_object"],
|
|
60
|
+
label_encoder=dt["label_encoder"],
|
|
61
|
+
prediction_column_name="sentiment_predictions" # Optional custom name
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# SAVE THE PREDICTIONS TO A CSV FILE
|
|
65
|
+
sentiments_prediction.write_csv("new_data/sentiments_prediction.csv")
|
|
66
|
+
|
|
67
|
+
# THE END
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# quick_sentiments/__init__.py
|
|
2
|
+
from .pipeline import run_pipeline # Expose pipeline function
|
|
3
|
+
from .predict import make_predictions # Expose prediction function
|
|
4
|
+
from .preprocess import pre_process # Expose preprocessing function
|
|
5
|
+
|
|
6
|
+
__all__ = ['run_pipeline', 'make_predictions'] # Controls what's available in *
|
|
7
|
+
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from xgboost import XGBClassifier # Import XGBClassifier
|
|
2
|
+
from sklearn.model_selection import GridSearchCV # Import GridSearchCV
|
|
3
|
+
from sklearn.metrics import classification_report # For evaluation metrics
|
|
4
|
+
import numpy as np # For type hinting
|
|
5
|
+
|
|
6
|
+
def train_and_predict(X_train, y_train, X_test, perform_tuning: bool = True): # <--- Added perform_tuning flag
|
|
7
|
+
"""
|
|
8
|
+
Trains XGBoostClassifier model (with optional hyperparameter tuning) and predicts on test data.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
X_train: training features (e.g., NumPy array or sparse matrix).
|
|
12
|
+
y_train: training labels (numerical, e.g., 0, 1, 2...).
|
|
13
|
+
X_test: test features (e.g., NumPy array or sparse matrix).
|
|
14
|
+
perform_tuning (bool): If True, performs GridSearchCV. If False, trains
|
|
15
|
+
the model with default parameters. Defaults to True.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
y_pred: predicted labels for test set.
|
|
19
|
+
best_model: The best trained XGBoostClassifier model (either from GridSearchCV or simple fit).
|
|
20
|
+
"""
|
|
21
|
+
print(" - Starting XGBoost training...")
|
|
22
|
+
|
|
23
|
+
# Determine objective and eval_metric based on number of unique classes
|
|
24
|
+
num_classes = len(np.unique(y_train))
|
|
25
|
+
|
|
26
|
+
if num_classes == 2:
|
|
27
|
+
xgb_objective = 'binary:logistic'
|
|
28
|
+
xgb_eval_metric = 'logloss'
|
|
29
|
+
scoring_metric = 'f1_weighted'
|
|
30
|
+
else:
|
|
31
|
+
xgb_objective = 'multi:softmax'
|
|
32
|
+
xgb_eval_metric = 'mlogloss'
|
|
33
|
+
scoring_metric = 'f1_weighted' # Or 'accuracy'
|
|
34
|
+
|
|
35
|
+
# Base XGBClassifier model (used for both tuning and simple fit)
|
|
36
|
+
# verbosity=0 to suppress excessive output from XGBoost itself during GridSearchCV
|
|
37
|
+
xgb_model = XGBClassifier(
|
|
38
|
+
objective=xgb_objective,
|
|
39
|
+
eval_metric=xgb_eval_metric,
|
|
40
|
+
use_label_encoder=False, # Suppress warning for newer versions
|
|
41
|
+
random_state=42,
|
|
42
|
+
num_class=num_classes if num_classes > 2 else None,
|
|
43
|
+
verbosity=0 # Suppress XGBoost internal verbosity during grid search
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if perform_tuning:
|
|
48
|
+
print(" - Performing GridSearchCV for hyperparameter tuning...")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
param_grid = {
|
|
52
|
+
'n_estimators': [100, 200],
|
|
53
|
+
'learning_rate': [0.05, 0.1, 0.2],
|
|
54
|
+
'max_depth': [3, 5, 7],
|
|
55
|
+
'subsample': [0.8, 1.0],
|
|
56
|
+
'colsample_bytree': [0.8, 1.0],
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
}
|
|
60
|
+
print(" - Using default parameter grid for tuning:", param_grid)
|
|
61
|
+
|
|
62
|
+
grid_search = GridSearchCV(
|
|
63
|
+
estimator=xgb_model, # Use the base xgb_model here
|
|
64
|
+
param_grid=param_grid,
|
|
65
|
+
cv=5,
|
|
66
|
+
scoring=scoring_metric,
|
|
67
|
+
n_jobs=-1,
|
|
68
|
+
verbose=1 # Print progress messages from GridSearchCV
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Fit GridSearchCV to the training data
|
|
72
|
+
grid_search.fit(X_train, y_train)
|
|
73
|
+
|
|
74
|
+
# Get the best model found by GridSearchCV
|
|
75
|
+
best_model = grid_search.best_estimator_
|
|
76
|
+
|
|
77
|
+
print("\n - Best Hyperparameters found:")
|
|
78
|
+
print(grid_search.best_params_)
|
|
79
|
+
print(f" - Best Cross-Validation Score ({scoring_metric}): {grid_search.best_score_:.4f}")
|
|
80
|
+
else:
|
|
81
|
+
|
|
82
|
+
print(" - Training XGBoost with default parameters (no hyperparameter tuning)...")
|
|
83
|
+
best_model = xgb_model # Use the base model directly
|
|
84
|
+
best_model.fit(X_train, y_train) # Fit it on X_train, y_train
|
|
85
|
+
print(" - Model trained with default parameters.")
|
|
86
|
+
|
|
87
|
+
# Make predictions on the test set using the best model (tuned or default)
|
|
88
|
+
y_pred = best_model.predict(X_test)
|
|
89
|
+
|
|
90
|
+
# Return both the predictions and the best model object
|
|
91
|
+
return y_pred, best_model
|
|
File without changes
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# MLAlgo/logistic_regression_model.py
|
|
2
|
+
|
|
3
|
+
from sklearn.linear_model import LogisticRegression
|
|
4
|
+
from sklearn.model_selection import GridSearchCV # Import GridSearchCV
|
|
5
|
+
from sklearn.metrics import classification_report # For evaluation metrics
|
|
6
|
+
import numpy as np # For type hinting
|
|
7
|
+
|
|
8
|
+
def train_and_predict(X_train, y_train, X_test,perform_tuning = False):
|
|
9
|
+
"""
|
|
10
|
+
Trains Logistic Regression model (with optional hyperparameter tuning) and predicts on test data.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
X_train: training features (e.g., NumPy array or sparse matrix).
|
|
14
|
+
y_train: training labels (list or NumPy array).
|
|
15
|
+
X_test: test features (e.g., NumPy array or sparse matrix).
|
|
16
|
+
perform_tuning (bool): If True, performs GridSearchCV. If False, trains
|
|
17
|
+
the model with default parameters. Defaults to True.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
y_pred: predicted labels for test set.
|
|
21
|
+
best_model: The best trained LogisticRegression model (either from GridSearchCV or simple fit).
|
|
22
|
+
"""
|
|
23
|
+
lr_model = LogisticRegression(random_state=42) # Base model for training
|
|
24
|
+
|
|
25
|
+
if perform_tuning:
|
|
26
|
+
print(" - Starting Logistic Regression training with GridSearchCV for hyperparameter tuning...")
|
|
27
|
+
|
|
28
|
+
# Define the parameter grid to search (default grid, as no custom grid is passed here)
|
|
29
|
+
param_grid = {
|
|
30
|
+
'solver': ['liblinear', 'lbfgs'],
|
|
31
|
+
'C': [0.1, 1.0, 10.0],
|
|
32
|
+
'class_weight': [None, 'balanced'],
|
|
33
|
+
'max_iter': [500, 1000]
|
|
34
|
+
}
|
|
35
|
+
print(" - Using default parameter grid for tuning:", param_grid)
|
|
36
|
+
|
|
37
|
+
grid_search = GridSearchCV(
|
|
38
|
+
estimator=lr_model,
|
|
39
|
+
param_grid=param_grid,
|
|
40
|
+
cv=5,
|
|
41
|
+
scoring='f1_weighted',
|
|
42
|
+
n_jobs=-1,
|
|
43
|
+
verbose=1
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
grid_search.fit(X_train, y_train)
|
|
47
|
+
best_model = grid_search.best_estimator_
|
|
48
|
+
print("\n - Best Hyperparameters found:")
|
|
49
|
+
print(grid_search.best_params_)
|
|
50
|
+
print(f" - Best Cross-Validation Score (F1-weighted): {grid_search.best_score_:.4f}")
|
|
51
|
+
else:
|
|
52
|
+
print(" - Training Logistic Regression with default parameters (no hyperparameter tuning)...")
|
|
53
|
+
best_model = lr_model # Use the base model directly
|
|
54
|
+
best_model.fit(X_train, y_train) # Fit it on X_train, y_train
|
|
55
|
+
print(" - Model trained with default parameters.")
|
|
56
|
+
|
|
57
|
+
y_pred = best_model.predict(X_test)
|
|
58
|
+
print("Best model parameters:", best_model.get_params())
|
|
59
|
+
|
|
60
|
+
return y_pred, best_model
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# MLAlgo/random_forest_model.py
|
|
2
|
+
|
|
3
|
+
from sklearn.ensemble import RandomForestClassifier
|
|
4
|
+
from sklearn.model_selection import GridSearchCV # Import GridSearchCV
|
|
5
|
+
from sklearn.metrics import classification_report # For evaluation metrics
|
|
6
|
+
import numpy as np # For type hinting
|
|
7
|
+
|
|
8
|
+
def train_and_predict(X_train, y_train, X_test, perform_tuning = False):
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
Trains RandomForestClassifier model (with optional hyperparameter tuning) and predicts on test data.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
X_train: training features (e.g., NumPy array or sparse matrix).
|
|
15
|
+
y_train: training labels (list or NumPy array).
|
|
16
|
+
X_test: test features (e.g., NumPy array or sparse matrix).
|
|
17
|
+
perform_tuning (bool): If True, performs GridSearchCV. If False, trains
|
|
18
|
+
the model with default parameters. Defaults to True.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
y_pred: predicted labels for test set.
|
|
22
|
+
best_model: The best trained RandomForestClassifier model (either from GridSearchCV or simple fit).
|
|
23
|
+
"""
|
|
24
|
+
rf_model = RandomForestClassifier(random_state=42) # Base model for training
|
|
25
|
+
|
|
26
|
+
if perform_tuning:
|
|
27
|
+
print(" - Starting Random Forest training with GridSearchCV for hyperparameter tuning...")
|
|
28
|
+
|
|
29
|
+
# Define the parameter grid to search for RandomForestClassifier (default grid)
|
|
30
|
+
param_grid = {
|
|
31
|
+
'n_estimators': [100, 200, 300],
|
|
32
|
+
'max_depth': [10, 20, None],
|
|
33
|
+
'min_samples_split': [2, 5],
|
|
34
|
+
'min_samples_leaf': [1, 2],
|
|
35
|
+
'class_weight': [None, 'balanced']
|
|
36
|
+
}
|
|
37
|
+
print(" - Using default parameter grid for tuning:", param_grid)
|
|
38
|
+
|
|
39
|
+
grid_search = GridSearchCV(
|
|
40
|
+
estimator=rf_model, # Use the base rf_model here
|
|
41
|
+
param_grid=param_grid,
|
|
42
|
+
cv=5,
|
|
43
|
+
scoring='f1_weighted',
|
|
44
|
+
n_jobs=-1,
|
|
45
|
+
verbose=1
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Fit GridSearchCV to the training data
|
|
49
|
+
grid_search.fit(X_train, y_train)
|
|
50
|
+
|
|
51
|
+
# Get the best model found by GridSearchCV
|
|
52
|
+
best_model = grid_search.best_estimator_
|
|
53
|
+
|
|
54
|
+
print("\n - Best Hyperparameters found:")
|
|
55
|
+
print(grid_search.best_params_)
|
|
56
|
+
print(f" - Best Cross-Validation Score (F1-weighted): {grid_search.best_score_:.4f}")
|
|
57
|
+
else:
|
|
58
|
+
print(" - Training Random Forest with default parameters (no hyperparameter tuning)...")
|
|
59
|
+
best_model = rf_model # Use the base model directly
|
|
60
|
+
best_model.fit(X_train, y_train) # Fit it on X_train, y_train
|
|
61
|
+
print(" - Model trained with default parameters.")
|
|
62
|
+
|
|
63
|
+
# Make predictions on the test set using the best model (tuned or default)
|
|
64
|
+
y_pred = best_model.predict(X_test)
|
|
65
|
+
|
|
66
|
+
# Return both the predictions and the best model object
|
|
67
|
+
return y_pred, best_model
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# POSITIVELY DO NOT CHANGE (This comment is for the user, I am making necessary fixes based on our conversation)
|
|
2
|
+
|
|
3
|
+
from sklearn.model_selection import train_test_split
|
|
4
|
+
from sklearn.metrics import classification_report, accuracy_score
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from sklearn.preprocessing import LabelEncoder
|
|
7
|
+
import polars as pl
|
|
8
|
+
import importlib
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from typing import Union
|
|
12
|
+
|
|
13
|
+
def run_pipeline(
|
|
14
|
+
vectorizer_name: str,
|
|
15
|
+
model_name: str,
|
|
16
|
+
df: Union[pl.DataFrame, pd.DataFrame],
|
|
17
|
+
text_column_name: str,
|
|
18
|
+
sentiment_column_name: str,
|
|
19
|
+
perform_tuning: bool = False
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Runs the full pipeline:
|
|
23
|
+
- vectorize
|
|
24
|
+
- train model
|
|
25
|
+
- evaluate
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
vectorizer_name (str): Name of the vectorizer (e.g., 'tfidf', 'word_embedding').
|
|
29
|
+
model_name (str): Name of the ML model (e.g., 'logistic_regression', 'random_forest').
|
|
30
|
+
df (pl.DataFrame): Your Polars DataFrame containing the text and sentiment columns.
|
|
31
|
+
text_column_name (str): The name of the column in `df` that contains the processed text.
|
|
32
|
+
sentiment_column_name (str): The name of the column in `df` that contains the sentiment labels.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
dict: A dictionary containing the trained model, fitted vectorizer, label encoder, and evaluation results.
|
|
36
|
+
"""
|
|
37
|
+
print(f"--- Running Pipeline for {vectorizer_name.replace('_', ' ').title()} + {model_name.replace('_', ' ').title()} ---")
|
|
38
|
+
|
|
39
|
+
# Import vectorizer from vect folder
|
|
40
|
+
try:
|
|
41
|
+
vec_module = importlib.import_module(f"quick_sentiments.vect.{vectorizer_name}")
|
|
42
|
+
vectorize_function = getattr(vec_module, "vectorize")
|
|
43
|
+
except (ImportError, AttributeError) as e:
|
|
44
|
+
print(f"Error loading vectorizer module/function: {e}")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
# Import ML model from ml_algo folder
|
|
48
|
+
try:
|
|
49
|
+
model_module = importlib.import_module(f"quick_sentiments.ml_algo.{model_name}")
|
|
50
|
+
train_and_predict_function = getattr(model_module, "train_and_predict")
|
|
51
|
+
except (ImportError, AttributeError) as e:
|
|
52
|
+
print(f"Error loading ML model module/function: {e}")
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
Modified to handle both Polars and pandas DataFrames.
|
|
57
|
+
"""
|
|
58
|
+
# Convert to Polars if input is pandas
|
|
59
|
+
if isinstance(df, pd.DataFrame):
|
|
60
|
+
df = pl.from_pandas(df)
|
|
61
|
+
elif not isinstance(df, pl.DataFrame):
|
|
62
|
+
raise TypeError(f"Expected Polars or pandas DataFrame, got {type(df)}")
|
|
63
|
+
|
|
64
|
+
# Polars DataFrame handling
|
|
65
|
+
X_text = df[text_column_name].to_list()
|
|
66
|
+
y_raw = df[sentiment_column_name].to_list()
|
|
67
|
+
|
|
68
|
+
# --- NEW: Check for and drop None values in X_text and y_raw ---
|
|
69
|
+
initial_data_len = len(X_text)
|
|
70
|
+
|
|
71
|
+
# Filter out pairs where either X_text element or y_raw element is None
|
|
72
|
+
# Use zip to iterate over both lists simultaneously and filter
|
|
73
|
+
filtered_data = [(x, y_val) for x, y_val in zip(X_text, y_raw) if x is not None and y_val is not None]
|
|
74
|
+
|
|
75
|
+
# Unzip the filtered data back into X_text and y_raw
|
|
76
|
+
if filtered_data: # Check if filtered_data is not empty to avoid unpacking error
|
|
77
|
+
X_text, y_raw = zip(*filtered_data)
|
|
78
|
+
X_text = list(X_text) # Convert back to list
|
|
79
|
+
y_raw = list(y_raw) # Convert back to list
|
|
80
|
+
else:
|
|
81
|
+
# Handle case where all data might be None
|
|
82
|
+
print("WARNING: All data rows contained missing values after initial extraction. Cannot proceed with training.")
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
dropped_rows_count = initial_data_len - len(X_text)
|
|
86
|
+
if dropped_rows_count > 0:
|
|
87
|
+
print(f"WARNING: Dropped {dropped_rows_count} rows due to missing values (None) in '{text_column_name}' or '{sentiment_column_name}' columns. Original rows: {initial_data_len}, Rows after dropping: {len(X_text)}")
|
|
88
|
+
else:
|
|
89
|
+
print("No missing values (None) found in text or sentiment columns. Proceeding with all rows.")
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
# Label Encoding for y_raw
|
|
93
|
+
label_encoder = LabelEncoder()
|
|
94
|
+
y = label_encoder.fit_transform(y_raw)
|
|
95
|
+
print(f"Labels encoded: Original -> {label_encoder.classes_}, Encoded -> {np.unique(y)}")
|
|
96
|
+
|
|
97
|
+
# Vectorize the entire dataset (X)
|
|
98
|
+
print("1. Vectorizing entire dataset (X)...")
|
|
99
|
+
X_vectorized, fitted_vectorizer_object = vectorize_function(X_text)
|
|
100
|
+
|
|
101
|
+
# Split data AFTER vectorization
|
|
102
|
+
print("2. Splitting data into train/test...")
|
|
103
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
104
|
+
X_vectorized, y, test_size=0.2, random_state=42, stratify=y
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Train + predict
|
|
108
|
+
print("3. Training and predicting...")
|
|
109
|
+
y_pred, trained_model_object = train_and_predict_function(X_train, y_train, X_test, perform_tuning=perform_tuning)
|
|
110
|
+
|
|
111
|
+
# Evaluate
|
|
112
|
+
print("4. Evaluating model...")
|
|
113
|
+
print("\nClassification Report:")
|
|
114
|
+
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
|
|
115
|
+
print("True labels distribution:", Counter(y_test))
|
|
116
|
+
print("Predicted labels distribution:", Counter(y_pred))
|
|
117
|
+
|
|
118
|
+
# Return results including all necessary objects for future predictions
|
|
119
|
+
return {
|
|
120
|
+
"model_object": trained_model_object,
|
|
121
|
+
"vectorizer_name": vectorizer_name,
|
|
122
|
+
"vectorizer_object": fitted_vectorizer_object,
|
|
123
|
+
"label_encoder": label_encoder,
|
|
124
|
+
"y_test": y_test,
|
|
125
|
+
"y_pred": y_pred,
|
|
126
|
+
"accuracy": accuracy_score(y_test, y_pred),
|
|
127
|
+
"report": classification_report(y_test, y_pred, output_dict=True, target_names=label_encoder.classes_)
|
|
128
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
import polars as pl
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
def make_predictions(
|
|
7
|
+
new_data: Union[pl.DataFrame, pd.DataFrame],
|
|
8
|
+
text_column_name: str,
|
|
9
|
+
vectorizer,
|
|
10
|
+
best_model,
|
|
11
|
+
label_encoder,
|
|
12
|
+
prediction_column_name: str = "predictions") -> pl.DataFrame:
|
|
13
|
+
"""
|
|
14
|
+
Makes predictions and adds them as a new column with original labels.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
new_data: Input DataFrame (Polars or pandas)
|
|
18
|
+
text_column_name: Name of column containing text to predict on
|
|
19
|
+
vectorizer: Fitted vectorizer (TF-IDF/BOW) or word embeddings model
|
|
20
|
+
best_model: Trained model (must have classes_ attribute)
|
|
21
|
+
label_encoder: Fitted LabelEncoder for inverse transform
|
|
22
|
+
prediction_column_name: Name for new prediction column
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Polars DataFrame with label predictions added
|
|
26
|
+
"""
|
|
27
|
+
# Convert pandas to Polars if needed
|
|
28
|
+
if isinstance(new_data, pd.DataFrame):
|
|
29
|
+
new_data = pl.from_pandas(new_data)
|
|
30
|
+
elif not isinstance(new_data, pl.DataFrame):
|
|
31
|
+
raise TypeError(f"Expected Polars or pandas DataFrame, got {type(new_data)}")
|
|
32
|
+
|
|
33
|
+
# Drop nulls in the text column
|
|
34
|
+
new_data = new_data.drop_nulls(subset=[text_column_name])
|
|
35
|
+
texts = new_data[text_column_name].to_list()
|
|
36
|
+
|
|
37
|
+
# Generate features
|
|
38
|
+
if hasattr(vectorizer, 'transform'):
|
|
39
|
+
new_features = vectorizer.transform(texts)
|
|
40
|
+
else:
|
|
41
|
+
def text_to_vector(text):
|
|
42
|
+
words = text.split()
|
|
43
|
+
vectors = [vectorizer[word] for word in words if word in vectorizer]
|
|
44
|
+
return np.mean(vectors, axis=0) if vectors else np.zeros(vectorizer.vector_size)
|
|
45
|
+
new_features = np.array([text_to_vector(text) for text in texts])
|
|
46
|
+
|
|
47
|
+
# Get numerical predictions
|
|
48
|
+
numeric_predictions = best_model.predict(new_features)
|
|
49
|
+
|
|
50
|
+
# Convert to original labels
|
|
51
|
+
predictions = label_encoder.inverse_transform(numeric_predictions)
|
|
52
|
+
|
|
53
|
+
# Add predictions as new column
|
|
54
|
+
return new_data.with_columns(
|
|
55
|
+
pl.Series(prediction_column_name, predictions)
|
|
56
|
+
)
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import string
|
|
2
|
+
import re
|
|
3
|
+
import nltk
|
|
4
|
+
from nltk.tokenize import word_tokenize
|
|
5
|
+
from nltk.corpus import stopwords
|
|
6
|
+
from nltk.stem import WordNetLemmatizer
|
|
7
|
+
import unicodedata
|
|
8
|
+
|
|
9
|
+
# This block ensures NLTK data is downloaded if not found.
|
|
10
|
+
# It's good practice to have this check in a module that relies on NLTK data.
|
|
11
|
+
# --- NLTK Downloads (Run these once, preferably outside the script or with a check) ---
|
|
12
|
+
# This block ensures NLTK data is downloaded if not found.
|
|
13
|
+
# It's good practice to have this check in a module that relies on NLTK data.
|
|
14
|
+
def _download_nltk_data():
|
|
15
|
+
"""Helper function to download NLTK data if not present."""
|
|
16
|
+
# List of NLTK datasets required by this module
|
|
17
|
+
# 'punkt' for word_tokenize, 'stopwords' for stopwords, 'wordnet' for WordNetLemmatizer
|
|
18
|
+
# 'omw-1.4' is often recommended for WordNetLemmatizer for broader coverage.
|
|
19
|
+
datasets = ['punkt', 'stopwords', 'wordnet', 'omw-1.4'] # <--- ADDED 'omw-1.4'
|
|
20
|
+
|
|
21
|
+
for dataset in datasets:
|
|
22
|
+
try:
|
|
23
|
+
# NLTK data paths vary, 'punkt' is a tokenizer, others are corpora.
|
|
24
|
+
# This check tries to find it based on common NLTK data structures.
|
|
25
|
+
if dataset == 'punkt':
|
|
26
|
+
nltk.data.find(f'tokenizers/{dataset}')
|
|
27
|
+
elif dataset == 'omw-1.4': # omw-1.4 is part of corpora
|
|
28
|
+
nltk.data.find(f'corpora/{dataset}')
|
|
29
|
+
else: # stopwords, wordnet are also in corpora
|
|
30
|
+
nltk.data.find(f'corpora/{dataset}')
|
|
31
|
+
print(f"NLTK data '{dataset}' already present.")
|
|
32
|
+
except Exception as e: # Catch any exception if data is not found
|
|
33
|
+
print(f"Downloading NLTK data: {dataset}...")
|
|
34
|
+
nltk.download(dataset)
|
|
35
|
+
print(f"NLTK data '{dataset}' downloaded.")
|
|
36
|
+
# Note regarding 'punkt_tab': There isn't a standard 'punkt_tab' dataset.
|
|
37
|
+
# It's likely a typo for 'punkt'. 'punkt' is already included.
|
|
38
|
+
|
|
39
|
+
_download_nltk_data() # Call this helper function when the module is imported
|
|
40
|
+
|
|
41
|
+
# --- Initialize NLTK components once ---
|
|
42
|
+
# These are global to this module
|
|
43
|
+
lemmatizer = WordNetLemmatizer()
|
|
44
|
+
stop_words = set(stopwords.words('english'))
|
|
45
|
+
|
|
46
|
+
# --- Initialize NLTK components once ---
|
|
47
|
+
# These are global to this module
|
|
48
|
+
lemmatizer = WordNetLemmatizer()
|
|
49
|
+
stop_words = set(stopwords.words('english'))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# --- Helper Cleaning Functions ---
|
|
54
|
+
def remove_square_brackets(text):
|
|
55
|
+
"""Removes text enclosed in square brackets."""
|
|
56
|
+
cleaned_text = re.sub(r'\[.*?\]', '', text)
|
|
57
|
+
return cleaned_text.strip()
|
|
58
|
+
|
|
59
|
+
def remove_urls_emails(text):
|
|
60
|
+
"""Removes URLs and email addresses from text."""
|
|
61
|
+
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
|
|
62
|
+
text = re.sub(r'\S*@\S*\s?', '', text)
|
|
63
|
+
return text
|
|
64
|
+
|
|
65
|
+
def remove_html_tags(text):
|
|
66
|
+
"""Removes HTML tags from text."""
|
|
67
|
+
clean = re.compile('<.*?>')
|
|
68
|
+
return re.sub(clean, '', text)
|
|
69
|
+
|
|
70
|
+
def remove_numbers(text):
|
|
71
|
+
"""Removes all digits from text."""
|
|
72
|
+
return re.sub(r'\d+', '', text)
|
|
73
|
+
|
|
74
|
+
def remove_extra_spaces(text):
|
|
75
|
+
"""Replaces multiple spaces with a single space and strips leading/trailing spaces."""
|
|
76
|
+
return re.sub(r'\s+', ' ', text).strip()
|
|
77
|
+
|
|
78
|
+
def remove_punctuation_from_token(token):
|
|
79
|
+
"""Removes punctuation characters from an individual token."""
|
|
80
|
+
translator = str.maketrans('', '', string.punctuation)
|
|
81
|
+
return token.translate(translator)
|
|
82
|
+
|
|
83
|
+
def remove_emojis(text):
|
|
84
|
+
"""Removes common emojis from text using a regex pattern."""
|
|
85
|
+
emoji_pattern = re.compile(
|
|
86
|
+
"["
|
|
87
|
+
"\U0001F600-\U0001F64F" # emoticons
|
|
88
|
+
"\U0001F300-\U0001F5FF" # symbols & pictographs
|
|
89
|
+
"\U0001F680-\U0001F6FF" # transport & map symbols
|
|
90
|
+
"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
|
91
|
+
"\U00002702-\U000027B0"
|
|
92
|
+
"\U000024C2-\U0001F251"
|
|
93
|
+
"]+", flags=re.UNICODE)
|
|
94
|
+
return emoji_pattern.sub(r'', text)
|
|
95
|
+
|
|
96
|
+
def normalize_unicode_characters(text): # <--- NEW HELPER FUNCTION
|
|
97
|
+
"""
|
|
98
|
+
Normalizes unicode characters (e.g., smart quotes, accented chars)
|
|
99
|
+
to their closest ASCII equivalents and removes non-ASCII.
|
|
100
|
+
"""
|
|
101
|
+
# Normalize to NFKD form (decomposes characters like é to e + accent)
|
|
102
|
+
# Then encode to ASCII, ignoring characters that can't be mapped
|
|
103
|
+
return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# --- Main Preprocessing Function with Options ---
|
|
107
|
+
def pre_process(doc,
|
|
108
|
+
remove_brackets=True,
|
|
109
|
+
remove_urls=True,
|
|
110
|
+
remove_html=True,
|
|
111
|
+
remove_nums=False,
|
|
112
|
+
remove_emojis_flag=False,
|
|
113
|
+
normalize_unicode=True,
|
|
114
|
+
to_lowercase=True,
|
|
115
|
+
tokenize=True,
|
|
116
|
+
remove_punct_tokens=True,
|
|
117
|
+
remove_stop_words=True,
|
|
118
|
+
lemmatize=True,
|
|
119
|
+
remove_extra_space=True,
|
|
120
|
+
return_string=True):
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
Preprocesses a text document with configurable cleaning steps.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
doc (str): The input text document.
|
|
127
|
+
remove_brackets (bool): If True, remove text in square brackets.
|
|
128
|
+
remove_urls (bool): If True, remove URLs and email addresses.
|
|
129
|
+
remove_html (bool): If True, remove HTML tags.
|
|
130
|
+
remove_nums (bool): If True, remove all numeric digits.
|
|
131
|
+
remove_emojis_flag (bool): If True, remove common emojis.
|
|
132
|
+
normalize_unicode (bool): If True, normalize unicode characters to ASCII.
|
|
133
|
+
to_lowercase (bool): If True, convert text to lowercase.
|
|
134
|
+
tokenize (bool): If True, tokenize the text using NLTK's word_tokenize.
|
|
135
|
+
remove_punct_tokens (bool): If True, remove punctuation from individual tokens.
|
|
136
|
+
remove_stop_words (bool): If True, remove common English stop words.
|
|
137
|
+
lemmatize (bool): If True, perform lemmatization on tokens.
|
|
138
|
+
remove_extra_space (bool): If True, replace multiple spaces with single spaces.
|
|
139
|
+
return_string (bool): If True, join tokens back into a string; otherwise, return a list of tokens.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
str or list: The preprocessed text as a string or a list of tokens.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# Stage 1: Text-level cleaning (before tokenization)
|
|
147
|
+
if remove_brackets:
|
|
148
|
+
doc = remove_square_brackets(doc)
|
|
149
|
+
if remove_urls:
|
|
150
|
+
doc = remove_urls_emails(doc)
|
|
151
|
+
if remove_html:
|
|
152
|
+
doc = remove_html_tags(doc)
|
|
153
|
+
if remove_nums:
|
|
154
|
+
doc = remove_numbers(doc)
|
|
155
|
+
if remove_emojis_flag:
|
|
156
|
+
doc = remove_emojis(doc)
|
|
157
|
+
if normalize_unicode: # <--- NEW STEP IN PIPELINE
|
|
158
|
+
doc = normalize_unicode_characters(doc)
|
|
159
|
+
if to_lowercase:
|
|
160
|
+
doc = doc.lower()
|
|
161
|
+
if remove_extra_space:
|
|
162
|
+
doc = remove_extra_spaces(doc)
|
|
163
|
+
|
|
164
|
+
# Stage 2: Tokenization
|
|
165
|
+
tokens = []
|
|
166
|
+
if tokenize:
|
|
167
|
+
tokens = word_tokenize(doc)
|
|
168
|
+
else:
|
|
169
|
+
# If not tokenizing, just return the string after initial cleaning
|
|
170
|
+
# Ensure it's a list if return_string is False, even if it's a single item
|
|
171
|
+
return [doc.strip()] if not return_string else doc.strip()
|
|
172
|
+
|
|
173
|
+
# Stage 3: Token-level cleaning and normalization
|
|
174
|
+
processed_tokens = []
|
|
175
|
+
for token in tokens:
|
|
176
|
+
if remove_punct_tokens:
|
|
177
|
+
token = remove_punctuation_from_token(token)
|
|
178
|
+
|
|
179
|
+
if not token: # Skip if token became empty after cleaning punctuation
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
if remove_stop_words:
|
|
183
|
+
if token in stop_words:
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
if lemmatize:
|
|
187
|
+
token = lemmatizer.lemmatize(token)
|
|
188
|
+
|
|
189
|
+
if token: # Final check in case lemmatization or other steps create empty strings
|
|
190
|
+
processed_tokens.append(token)
|
|
191
|
+
|
|
192
|
+
if return_string:
|
|
193
|
+
return " ".join(processed_tokens)
|
|
194
|
+
else:
|
|
195
|
+
return processed_tokens
|