edsl 0.1.39.dev3__py3-none-any.whl → 0.1.39.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +413 -332
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +57 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +1071 -867
- edsl/agents/AgentList.py +551 -413
- edsl/agents/Invigilator.py +284 -233
- edsl/agents/InvigilatorBase.py +257 -270
- edsl/agents/PromptConstructor.py +272 -354
- edsl/agents/QuestionInstructionPromptBuilder.py +128 -0
- edsl/agents/QuestionTemplateReplacementsBuilder.py +137 -0
- edsl/agents/__init__.py +2 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/agents/question_option_processor.py +172 -0
- edsl/auto/AutoStudy.py +130 -117
- edsl/auto/StageBase.py +243 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +74 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +218 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +177 -157
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +59 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/CoopFunctionsMixin.py +15 -0
- edsl/coop/ExpectedParrotKeyHandler.py +125 -0
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1106 -1028
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +573 -555
- edsl/data/CacheEntry.py +230 -233
- edsl/data/CacheHandler.py +168 -149
- edsl/data/RemoteCacheSync.py +186 -78
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +5 -4
- edsl/data/hack.py +10 -0
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +74 -73
- edsl/enums.py +202 -175
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +54 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/inference_services.py +5 -0
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +109 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +29 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +106 -87
- edsl/inference_services/AvailableModelCacheHandler.py +184 -0
- edsl/inference_services/AvailableModelFetcher.py +215 -0
- edsl/inference_services/AwsBedrock.py +118 -120
- edsl/inference_services/AzureAI.py +215 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +143 -148
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +80 -147
- edsl/inference_services/InferenceServicesCollection.py +138 -97
- edsl/inference_services/MistralAIService.py +120 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +236 -224
- edsl/inference_services/PerplexityService.py +160 -163
- edsl/inference_services/ServiceAvailability.py +135 -0
- edsl/inference_services/TestService.py +90 -89
- edsl/inference_services/TogetherAIService.py +172 -170
- edsl/inference_services/data_structures.py +134 -0
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -41
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/AnswerQuestionFunctionConstructor.py +223 -0
- edsl/jobs/Answers.py +43 -56
- edsl/jobs/FetchInvigilator.py +47 -0
- edsl/jobs/InterviewTaskManager.py +98 -0
- edsl/jobs/InterviewsConstructor.py +50 -0
- edsl/jobs/Jobs.py +823 -898
- edsl/jobs/JobsChecks.py +172 -147
- edsl/jobs/JobsComponentConstructor.py +189 -0
- edsl/jobs/JobsPrompts.py +270 -268
- edsl/jobs/JobsRemoteInferenceHandler.py +311 -239
- edsl/jobs/JobsRemoteInferenceLogger.py +239 -0
- edsl/jobs/RequestTokenEstimator.py +30 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/async_interview_runner.py +138 -0
- edsl/jobs/buckets/BucketCollection.py +104 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +283 -251
- edsl/jobs/buckets/TokenBucketAPI.py +211 -0
- edsl/jobs/buckets/TokenBucketClient.py +191 -0
- edsl/jobs/check_survey_scenario_compatibility.py +85 -0
- edsl/jobs/data_structures.py +120 -0
- edsl/jobs/decorators.py +35 -0
- edsl/jobs/interviews/Interview.py +396 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/jobs_status_enums.py +9 -0
- edsl/jobs/loggers/HTMLTableJobLogger.py +304 -0
- edsl/jobs/results_exceptions_handler.py +98 -0
- edsl/jobs/runners/JobsRunnerAsyncio.py +151 -466
- edsl/jobs/runners/JobsRunnerStatus.py +297 -330
- edsl/jobs/tasks/QuestionTaskCreator.py +244 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +470 -450
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +161 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/ComputeCost.py +63 -0
- edsl/language_models/LanguageModel.py +626 -668
- edsl/language_models/ModelList.py +164 -155
- edsl/language_models/PriceManager.py +127 -0
- edsl/language_models/RawResponseHandler.py +106 -0
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/__init__.py +2 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/key_management/KeyLookup.py +63 -0
- edsl/language_models/key_management/KeyLookupBuilder.py +273 -0
- edsl/language_models/key_management/KeyLookupCollection.py +38 -0
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +131 -0
- edsl/language_models/model.py +256 -0
- edsl/language_models/repair.py +156 -156
- edsl/language_models/utilities.py +65 -64
- edsl/notebooks/Notebook.py +263 -258
- edsl/notebooks/NotebookToLaTeX.py +142 -0
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +352 -362
- edsl/prompts/__init__.py +2 -2
- edsl/questions/ExceptionExplainer.py +77 -0
- edsl/questions/HTMLQuestion.py +103 -0
- edsl/questions/QuestionBase.py +518 -664
- edsl/questions/QuestionBasePromptsMixin.py +221 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +180 -182
- edsl/questions/QuestionFreeText.py +113 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +223 -231
- edsl/questions/QuestionMatrix.py +265 -0
- edsl/questions/QuestionMultipleChoice.py +330 -286
- edsl/questions/QuestionNumerical.py +151 -153
- edsl/questions/QuestionRank.py +314 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/SimpleAskMixin.py +74 -73
- edsl/questions/__init__.py +27 -26
- edsl/questions/{AnswerValidatorMixin.py → answer_validator_mixin.py} +334 -289
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/data_structures.py +20 -0
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +90 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +427 -413
- edsl/questions/loop_processor.py +149 -0
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/{QuestionBaseGenMixin.py → question_base_gen_mixin.py} +168 -161
- edsl/questions/question_registry.py +177 -177
- edsl/questions/{RegisterQuestionsMeta.py → register_questions_meta.py} +71 -71
- edsl/questions/{ResponseValidatorABC.py → response_validator_abc.py} +188 -174
- edsl/questions/response_validator_factory.py +34 -0
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/matrix/__init__.py +1 -0
- edsl/questions/templates/matrix/answering_instructions.jinja +5 -0
- edsl/questions/templates/matrix/question_presentation.jinja +20 -0
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -108
- edsl/results/Dataset.py +587 -424
- edsl/results/DatasetExportMixin.py +594 -731
- edsl/results/DatasetTree.py +295 -275
- edsl/results/MarkdownToDocx.py +122 -0
- edsl/results/MarkdownToPDF.py +111 -0
- edsl/results/Result.py +557 -465
- edsl/results/Results.py +1183 -1165
- edsl/results/ResultsExportMixin.py +45 -43
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/TableDisplay.py +125 -198
- edsl/results/TextEditor.py +50 -0
- edsl/results/__init__.py +2 -2
- edsl/results/file_exports.py +252 -0
- edsl/results/{ResultsFetchMixin.py → results_fetch_mixin.py} +33 -33
- edsl/results/{Selector.py → results_selector.py} +145 -135
- edsl/results/{ResultsToolsMixin.py → results_tools_mixin.py} +98 -98
- edsl/results/smart_objects.py +96 -0
- edsl/results/table_data_class.py +12 -0
- edsl/results/table_display.css +77 -77
- edsl/results/table_renderers.py +118 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/ConstructDownloadLink.py +109 -0
- edsl/scenarios/DocumentChunker.py +102 -0
- edsl/scenarios/DocxScenario.py +16 -0
- edsl/scenarios/FileStore.py +511 -632
- edsl/scenarios/PdfExtractor.py +40 -0
- edsl/scenarios/Scenario.py +498 -601
- edsl/scenarios/ScenarioHtmlMixin.py +65 -64
- edsl/scenarios/ScenarioList.py +1458 -1287
- edsl/scenarios/ScenarioListExportMixin.py +45 -52
- edsl/scenarios/ScenarioListPdfMixin.py +239 -261
- edsl/scenarios/__init__.py +3 -4
- edsl/scenarios/directory_scanner.py +96 -0
- edsl/scenarios/file_methods.py +85 -0
- edsl/scenarios/handlers/__init__.py +13 -0
- edsl/scenarios/handlers/csv.py +38 -0
- edsl/scenarios/handlers/docx.py +76 -0
- edsl/scenarios/handlers/html.py +37 -0
- edsl/scenarios/handlers/json.py +111 -0
- edsl/scenarios/handlers/latex.py +5 -0
- edsl/scenarios/handlers/md.py +51 -0
- edsl/scenarios/handlers/pdf.py +68 -0
- edsl/scenarios/handlers/png.py +39 -0
- edsl/scenarios/handlers/pptx.py +105 -0
- edsl/scenarios/handlers/py.py +294 -0
- edsl/scenarios/handlers/sql.py +313 -0
- edsl/scenarios/handlers/sqlite.py +149 -0
- edsl/scenarios/handlers/txt.py +33 -0
- edsl/scenarios/{ScenarioJoin.py → scenario_join.py} +131 -127
- edsl/scenarios/scenario_selector.py +156 -0
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +521 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/ConstructDAG.py +92 -0
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/EditSurvey.py +221 -0
- edsl/surveys/InstructionHandler.py +100 -0
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryManagement.py +72 -0
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +327 -326
- edsl/surveys/RuleCollection.py +385 -387
- edsl/surveys/RuleManager.py +172 -0
- edsl/surveys/Simulator.py +75 -0
- edsl/surveys/Survey.py +1280 -1801
- edsl/surveys/SurveyCSS.py +273 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/{SurveyFlowVisualizationMixin.py → SurveyFlowVisualization.py} +181 -179
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/SurveyToApp.py +141 -0
- edsl/surveys/__init__.py +5 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +60 -56
- edsl/surveys/instructions/ChangeInstruction.py +48 -49
- edsl/surveys/instructions/Instruction.py +56 -65
- edsl/surveys/instructions/InstructionCollection.py +82 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -19
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/test_h +1 -0
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/PrettyList.py +56 -0
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/gcp_bucket/example.py +50 -0
- edsl/utilities/interface.py +627 -627
- edsl/utilities/is_notebook.py +18 -0
- edsl/utilities/is_valid_variable_name.py +11 -0
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/remove_edsl_version.py +24 -0
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +436 -424
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/LICENSE +21 -21
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/METADATA +13 -11
- edsl-0.1.39.dev4.dist-info/RECORD +361 -0
- edsl/language_models/KeyLookup.py +0 -30
- edsl/language_models/registry.py +0 -190
- edsl/language_models/unused/ReplicateBase.py +0 -83
- edsl/results/ResultsDBMixin.py +0 -238
- edsl-0.1.39.dev3.dist-info/RECORD +0 -277
- {edsl-0.1.39.dev3.dist-info → edsl-0.1.39.dev4.dist-info}/WHEEL +0 -0
edsl/utilities/utilities.py
CHANGED
@@ -1,424 +1,436 @@
|
|
1
|
-
"""Utility functions for working with strings, dictionaries, and files."""
|
2
|
-
|
3
|
-
from functools import wraps
|
4
|
-
import types
|
5
|
-
import time
|
6
|
-
|
7
|
-
import hashlib
|
8
|
-
import json
|
9
|
-
import keyword
|
10
|
-
import os
|
11
|
-
import random
|
12
|
-
import re
|
13
|
-
import string
|
14
|
-
import tempfile
|
15
|
-
import gzip
|
16
|
-
import webbrowser
|
17
|
-
import json
|
18
|
-
|
19
|
-
from html import escape
|
20
|
-
from typing import Callable, Union
|
21
|
-
|
22
|
-
|
23
|
-
class CustomEncoder(json.JSONEncoder):
|
24
|
-
def default(self, obj):
|
25
|
-
try:
|
26
|
-
return json.JSONEncoder.default(self, obj)
|
27
|
-
except TypeError:
|
28
|
-
return str(obj)
|
29
|
-
|
30
|
-
|
31
|
-
def time_it(func):
|
32
|
-
@wraps(func)
|
33
|
-
def wrapper(*args, **kwargs):
|
34
|
-
start_time = time.time()
|
35
|
-
result = func(*args, **kwargs)
|
36
|
-
end_time = time.time()
|
37
|
-
execution_time = end_time - start_time
|
38
|
-
class_name = args[0].__class__.__name__ if args else func.__module__
|
39
|
-
print(
|
40
|
-
f"Function {class_name}.{func.__name__} took {execution_time:.4f} seconds to execute"
|
41
|
-
)
|
42
|
-
return result
|
43
|
-
|
44
|
-
return wrapper
|
45
|
-
|
46
|
-
|
47
|
-
def time_all_functions(module_or_class):
|
48
|
-
for name, obj in vars(module_or_class).items():
|
49
|
-
if isinstance(obj, types.FunctionType):
|
50
|
-
setattr(module_or_class, name, time_it(obj))
|
51
|
-
|
52
|
-
|
53
|
-
def dict_hash(data: dict):
|
54
|
-
return hash(
|
55
|
-
int(hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest(), 16)
|
56
|
-
)
|
57
|
-
|
58
|
-
|
59
|
-
def extract_json_from_string(text):
|
60
|
-
pattern = re.compile(r"\{.*?\}")
|
61
|
-
match = pattern.search(text)
|
62
|
-
if match:
|
63
|
-
json_data = match.group(0)
|
64
|
-
try:
|
65
|
-
json_object = json.loads(json_data)
|
66
|
-
return json_object
|
67
|
-
except json.JSONDecodeError:
|
68
|
-
return None
|
69
|
-
return None
|
70
|
-
|
71
|
-
|
72
|
-
def fix_partial_correct_response(text: str) -> dict:
|
73
|
-
# Find the start position of the key "answer"
|
74
|
-
answer_key_start = text.find('"answer"')
|
75
|
-
|
76
|
-
if answer_key_start == -1:
|
77
|
-
return {"error": "No 'answer' key found in the text"}
|
78
|
-
|
79
|
-
# Define regex to find the complete JSON object starting with "answer"
|
80
|
-
json_pattern = r'(\{[^\{\}]*"answer"[^\{\}]*\})'
|
81
|
-
match = re.search(json_pattern, text)
|
82
|
-
|
83
|
-
if not match:
|
84
|
-
return {"error": "No valid JSON object found"}
|
85
|
-
|
86
|
-
# Extract the matched JSON object
|
87
|
-
json_object = match.group(0)
|
88
|
-
|
89
|
-
# Find the start and stop positions of the JSON object in the original text
|
90
|
-
start_pos = text.find(json_object)
|
91
|
-
stop_pos = start_pos + len(json_object)
|
92
|
-
|
93
|
-
# Parse the JSON object to validate it
|
94
|
-
try:
|
95
|
-
parsed_json = json.loads(json_object)
|
96
|
-
except json.JSONDecodeError:
|
97
|
-
return {"error": "Failed to parse JSON object"}
|
98
|
-
|
99
|
-
# Return the result as a dictionary with positions
|
100
|
-
return {"start": start_pos, "stop": stop_pos, "extracted_json": json_object}
|
101
|
-
|
102
|
-
|
103
|
-
def clean_json(bad_json_str):
|
104
|
-
"""
|
105
|
-
Clean JSON string by replacing single quotes with double quotes
|
106
|
-
|
107
|
-
"""
|
108
|
-
replacements = [
|
109
|
-
("\\", "\\\\"),
|
110
|
-
("\n", "\\n"),
|
111
|
-
("\r", "\\r"),
|
112
|
-
("\t", "\\t"),
|
113
|
-
("\b", "\\b"),
|
114
|
-
("\f", "\\f"),
|
115
|
-
("[/INST]", "removed_inst"),
|
116
|
-
]
|
117
|
-
|
118
|
-
s = bad_json_str
|
119
|
-
for old, new in replacements:
|
120
|
-
s = s.replace(old, new)
|
121
|
-
return s
|
122
|
-
|
123
|
-
|
124
|
-
def data_to_html(data, replace_new_lines=False):
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
def is_gzipped(file_path):
|
148
|
-
"""Check if a file is gzipped."""
|
149
|
-
try:
|
150
|
-
with gzip.open(file_path, "rb") as file:
|
151
|
-
file.read(1) # Try reading a small amount of data
|
152
|
-
return True
|
153
|
-
except OSError:
|
154
|
-
return False
|
155
|
-
|
156
|
-
|
157
|
-
def hash_value(value: Union[str, int]) -> str:
|
158
|
-
"""Hash a string or integer value using SHA-256."""
|
159
|
-
if isinstance(value, str):
|
160
|
-
value_bytes = value.encode("utf-8")
|
161
|
-
elif isinstance(value, int):
|
162
|
-
value_bytes = str(value).encode("utf-8")
|
163
|
-
else:
|
164
|
-
raise ValueError("Hashing supported only for strings or integers.")
|
165
|
-
hash_obj = hashlib.sha256(value_bytes)
|
166
|
-
return hash_obj.hexdigest()
|
167
|
-
|
168
|
-
|
169
|
-
def repair_json(json_string: str) -> str:
|
170
|
-
"""Attempt to repair a JSON string that is not valid JSON."""
|
171
|
-
json_string = json_string.replace("\n", "\\n").replace("\r", "\\r")
|
172
|
-
json_string = json_string.replace("'", "\\'")
|
173
|
-
json_string = json_string.replace("'", '"')
|
174
|
-
json_string = re.sub(r",\s*}", "}", json_string)
|
175
|
-
json_string = re.sub(r",\s*]", "]", json_string)
|
176
|
-
json_string = re.sub(r"(?<={|,)\s*([a-zA-Z0-9_]+)\s*:", r'"\1":', json_string)
|
177
|
-
return json_string
|
178
|
-
|
179
|
-
|
180
|
-
def dict_to_html(d):
|
181
|
-
"""Convert a dictionary to an HTML table."""
|
182
|
-
# Start the HTML table
|
183
|
-
html_table = f'<table border="1">\n<tr><th>{escape("Key")}</th><th>{escape("Value")}</th></tr>\n'
|
184
|
-
|
185
|
-
# Add rows to the HTML table
|
186
|
-
for key, value in d.items():
|
187
|
-
html_table += (
|
188
|
-
f"<tr><td>{escape(str(key))}</td><td>{escape(str(value))}</td></tr>\n"
|
189
|
-
)
|
190
|
-
|
191
|
-
# Close the HTML table
|
192
|
-
html_table += "</table>"
|
193
|
-
return html_table
|
194
|
-
|
195
|
-
|
196
|
-
def is_notebook() -> bool:
|
197
|
-
"""Check if the code is running in a Jupyter notebook."""
|
198
|
-
try:
|
199
|
-
shell = get_ipython().__class__.__name__
|
200
|
-
if shell == "ZMQInteractiveShell":
|
201
|
-
return True # Jupyter notebook or qtconsole
|
202
|
-
elif shell == "
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
"
|
266
|
-
"
|
267
|
-
"
|
268
|
-
"
|
269
|
-
"
|
270
|
-
"
|
271
|
-
"
|
272
|
-
"
|
273
|
-
"
|
274
|
-
"
|
275
|
-
"
|
276
|
-
"
|
277
|
-
"
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
""
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
return
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
#
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
1
|
+
"""Utility functions for working with strings, dictionaries, and files."""
|
2
|
+
|
3
|
+
from functools import wraps
|
4
|
+
import types
|
5
|
+
import time
|
6
|
+
|
7
|
+
import hashlib
|
8
|
+
import json
|
9
|
+
import keyword
|
10
|
+
import os
|
11
|
+
import random
|
12
|
+
import re
|
13
|
+
import string
|
14
|
+
import tempfile
|
15
|
+
import gzip
|
16
|
+
import webbrowser
|
17
|
+
import json
|
18
|
+
|
19
|
+
from html import escape
|
20
|
+
from typing import Callable, Union
|
21
|
+
|
22
|
+
|
23
|
+
class CustomEncoder(json.JSONEncoder):
|
24
|
+
def default(self, obj):
|
25
|
+
try:
|
26
|
+
return json.JSONEncoder.default(self, obj)
|
27
|
+
except TypeError:
|
28
|
+
return str(obj)
|
29
|
+
|
30
|
+
|
31
|
+
def time_it(func):
|
32
|
+
@wraps(func)
|
33
|
+
def wrapper(*args, **kwargs):
|
34
|
+
start_time = time.time()
|
35
|
+
result = func(*args, **kwargs)
|
36
|
+
end_time = time.time()
|
37
|
+
execution_time = end_time - start_time
|
38
|
+
class_name = args[0].__class__.__name__ if args else func.__module__
|
39
|
+
print(
|
40
|
+
f"Function {class_name}.{func.__name__} took {execution_time:.4f} seconds to execute"
|
41
|
+
)
|
42
|
+
return result
|
43
|
+
|
44
|
+
return wrapper
|
45
|
+
|
46
|
+
|
47
|
+
def time_all_functions(module_or_class):
|
48
|
+
for name, obj in vars(module_or_class).items():
|
49
|
+
if isinstance(obj, types.FunctionType):
|
50
|
+
setattr(module_or_class, name, time_it(obj))
|
51
|
+
|
52
|
+
|
53
|
+
def dict_hash(data: dict):
|
54
|
+
return hash(
|
55
|
+
int(hashlib.md5(json.dumps(data, sort_keys=True).encode()).hexdigest(), 16)
|
56
|
+
)
|
57
|
+
|
58
|
+
|
59
|
+
def extract_json_from_string(text):
|
60
|
+
pattern = re.compile(r"\{.*?\}")
|
61
|
+
match = pattern.search(text)
|
62
|
+
if match:
|
63
|
+
json_data = match.group(0)
|
64
|
+
try:
|
65
|
+
json_object = json.loads(json_data)
|
66
|
+
return json_object
|
67
|
+
except json.JSONDecodeError:
|
68
|
+
return None
|
69
|
+
return None
|
70
|
+
|
71
|
+
|
72
|
+
def fix_partial_correct_response(text: str) -> dict:
|
73
|
+
# Find the start position of the key "answer"
|
74
|
+
answer_key_start = text.find('"answer"')
|
75
|
+
|
76
|
+
if answer_key_start == -1:
|
77
|
+
return {"error": "No 'answer' key found in the text"}
|
78
|
+
|
79
|
+
# Define regex to find the complete JSON object starting with "answer"
|
80
|
+
json_pattern = r'(\{[^\{\}]*"answer"[^\{\}]*\})'
|
81
|
+
match = re.search(json_pattern, text)
|
82
|
+
|
83
|
+
if not match:
|
84
|
+
return {"error": "No valid JSON object found"}
|
85
|
+
|
86
|
+
# Extract the matched JSON object
|
87
|
+
json_object = match.group(0)
|
88
|
+
|
89
|
+
# Find the start and stop positions of the JSON object in the original text
|
90
|
+
start_pos = text.find(json_object)
|
91
|
+
stop_pos = start_pos + len(json_object)
|
92
|
+
|
93
|
+
# Parse the JSON object to validate it
|
94
|
+
try:
|
95
|
+
parsed_json = json.loads(json_object)
|
96
|
+
except json.JSONDecodeError:
|
97
|
+
return {"error": "Failed to parse JSON object"}
|
98
|
+
|
99
|
+
# Return the result as a dictionary with positions
|
100
|
+
return {"start": start_pos, "stop": stop_pos, "extracted_json": json_object}
|
101
|
+
|
102
|
+
|
103
|
+
def clean_json(bad_json_str):
|
104
|
+
"""
|
105
|
+
Clean JSON string by replacing single quotes with double quotes
|
106
|
+
|
107
|
+
"""
|
108
|
+
replacements = [
|
109
|
+
("\\", "\\\\"),
|
110
|
+
("\n", "\\n"),
|
111
|
+
("\r", "\\r"),
|
112
|
+
("\t", "\\t"),
|
113
|
+
("\b", "\\b"),
|
114
|
+
("\f", "\\f"),
|
115
|
+
("[/INST]", "removed_inst"),
|
116
|
+
]
|
117
|
+
|
118
|
+
s = bad_json_str
|
119
|
+
for old, new in replacements:
|
120
|
+
s = s.replace(old, new)
|
121
|
+
return s
|
122
|
+
|
123
|
+
|
124
|
+
# def data_to_html(data, replace_new_lines=False):
|
125
|
+
# if "edsl_version" in data:
|
126
|
+
# _ = data.pop("edsl_version")
|
127
|
+
# if "edsl_class_name" in data:
|
128
|
+
# _ = data.pop("edsl_class_name")
|
129
|
+
|
130
|
+
# from pygments import highlight
|
131
|
+
# from pygments.lexers import JsonLexer
|
132
|
+
# from pygments.formatters import HtmlFormatter
|
133
|
+
# from IPython.display import HTML
|
134
|
+
|
135
|
+
# json_str = json.dumps(data, indent=4, cls=CustomEncoder)
|
136
|
+
# formatted_json = highlight(
|
137
|
+
# json_str,
|
138
|
+
# JsonLexer(),
|
139
|
+
# HtmlFormatter(style="default", full=False, noclasses=False),
|
140
|
+
# )
|
141
|
+
# if replace_new_lines:
|
142
|
+
# formatted_json = formatted_json.replace("\\n", "<br>")
|
143
|
+
|
144
|
+
# return HTML(formatted_json).data
|
145
|
+
|
146
|
+
|
147
|
+
def is_gzipped(file_path):
|
148
|
+
"""Check if a file is gzipped."""
|
149
|
+
try:
|
150
|
+
with gzip.open(file_path, "rb") as file:
|
151
|
+
file.read(1) # Try reading a small amount of data
|
152
|
+
return True
|
153
|
+
except OSError:
|
154
|
+
return False
|
155
|
+
|
156
|
+
|
157
|
+
def hash_value(value: Union[str, int]) -> str:
|
158
|
+
"""Hash a string or integer value using SHA-256."""
|
159
|
+
if isinstance(value, str):
|
160
|
+
value_bytes = value.encode("utf-8")
|
161
|
+
elif isinstance(value, int):
|
162
|
+
value_bytes = str(value).encode("utf-8")
|
163
|
+
else:
|
164
|
+
raise ValueError("Hashing supported only for strings or integers.")
|
165
|
+
hash_obj = hashlib.sha256(value_bytes)
|
166
|
+
return hash_obj.hexdigest()
|
167
|
+
|
168
|
+
|
169
|
+
def repair_json(json_string: str) -> str:
|
170
|
+
"""Attempt to repair a JSON string that is not valid JSON."""
|
171
|
+
json_string = json_string.replace("\n", "\\n").replace("\r", "\\r")
|
172
|
+
json_string = json_string.replace("'", "\\'")
|
173
|
+
json_string = json_string.replace("'", '"')
|
174
|
+
json_string = re.sub(r",\s*}", "}", json_string)
|
175
|
+
json_string = re.sub(r",\s*]", "]", json_string)
|
176
|
+
json_string = re.sub(r"(?<={|,)\s*([a-zA-Z0-9_]+)\s*:", r'"\1":', json_string)
|
177
|
+
return json_string
|
178
|
+
|
179
|
+
|
180
|
+
def dict_to_html(d):
|
181
|
+
"""Convert a dictionary to an HTML table."""
|
182
|
+
# Start the HTML table
|
183
|
+
html_table = f'<table border="1">\n<tr><th>{escape("Key")}</th><th>{escape("Value")}</th></tr>\n'
|
184
|
+
|
185
|
+
# Add rows to the HTML table
|
186
|
+
for key, value in d.items():
|
187
|
+
html_table += (
|
188
|
+
f"<tr><td>{escape(str(key))}</td><td>{escape(str(value))}</td></tr>\n"
|
189
|
+
)
|
190
|
+
|
191
|
+
# Close the HTML table
|
192
|
+
html_table += "</table>"
|
193
|
+
return html_table
|
194
|
+
|
195
|
+
|
196
|
+
def is_notebook() -> bool:
|
197
|
+
"""Check if the code is running in a Jupyter notebook or Google Colab."""
|
198
|
+
try:
|
199
|
+
shell = get_ipython().__class__.__name__
|
200
|
+
if shell == "ZMQInteractiveShell":
|
201
|
+
return True # Jupyter notebook or qtconsole
|
202
|
+
elif shell == "Shell": # Google Colab's shell class
|
203
|
+
import sys
|
204
|
+
|
205
|
+
if "google.colab" in sys.modules:
|
206
|
+
return True # Running in Google Colab
|
207
|
+
return False
|
208
|
+
elif shell == "TerminalInteractiveShell":
|
209
|
+
return False # Terminal running IPython
|
210
|
+
else:
|
211
|
+
return False # Other type
|
212
|
+
except NameError:
|
213
|
+
return False # Probably standard Python interpreter
|
214
|
+
|
215
|
+
|
216
|
+
def file_notice(file_name):
|
217
|
+
"""Print a notice about the file being created."""
|
218
|
+
if is_notebook():
|
219
|
+
from IPython.display import HTML, display
|
220
|
+
|
221
|
+
link_text = "Download file"
|
222
|
+
display(
|
223
|
+
HTML(
|
224
|
+
f'<p>File created: {file_name}</p>.<a href="{file_name}" download>{link_text}</a>'
|
225
|
+
)
|
226
|
+
)
|
227
|
+
else:
|
228
|
+
print(f"File created: {file_name}")
|
229
|
+
|
230
|
+
|
231
|
+
class HTMLSnippet(str):
|
232
|
+
"""Create an object with html content (`value`).
|
233
|
+
|
234
|
+
`view` method allows you to view the html content in a web browser.
|
235
|
+
"""
|
236
|
+
|
237
|
+
def __init__(self, value):
|
238
|
+
"""Initialize the HTMLSnippet object."""
|
239
|
+
super().__init__()
|
240
|
+
self.value = value
|
241
|
+
|
242
|
+
def view(self):
|
243
|
+
"""View the HTML content in a web browser."""
|
244
|
+
html_content = self.value
|
245
|
+
|
246
|
+
# create a tempfile to write the HTML content
|
247
|
+
with tempfile.NamedTemporaryFile("w", delete=False, suffix=".html") as f:
|
248
|
+
f.write(html_content)
|
249
|
+
|
250
|
+
# open the HTML tempfile in the default web browser
|
251
|
+
webbrowser.open(f"file://{os.path.realpath(f.name)}")
|
252
|
+
|
253
|
+
|
254
|
+
def random_string() -> str:
|
255
|
+
"""Generate a random string of fixed length."""
|
256
|
+
return "".join(random.choice(string.ascii_letters) for i in range(10))
|
257
|
+
|
258
|
+
|
259
|
+
def shortname_proposal(question, max_length=None):
|
260
|
+
"""Take a question text and generate a slug."""
|
261
|
+
question = question.lower()
|
262
|
+
tokens = question.split()
|
263
|
+
stopwords = set(
|
264
|
+
[
|
265
|
+
"is",
|
266
|
+
"your",
|
267
|
+
"who",
|
268
|
+
"the",
|
269
|
+
"a",
|
270
|
+
"an",
|
271
|
+
"of",
|
272
|
+
"could",
|
273
|
+
"you",
|
274
|
+
"what",
|
275
|
+
"when",
|
276
|
+
"where",
|
277
|
+
"why",
|
278
|
+
"in",
|
279
|
+
"and",
|
280
|
+
"to",
|
281
|
+
"how",
|
282
|
+
"are",
|
283
|
+
"what",
|
284
|
+
]
|
285
|
+
)
|
286
|
+
filtered_tokens = [
|
287
|
+
token.strip(string.punctuation) for token in tokens if token not in stopwords
|
288
|
+
]
|
289
|
+
heading = "_".join(filtered_tokens)
|
290
|
+
# Limit length if needed
|
291
|
+
if max_length and len(heading) > max_length:
|
292
|
+
heading = heading[:max_length]
|
293
|
+
while heading.endswith("_"): # trim any trailing _ characters
|
294
|
+
heading = heading[:-1]
|
295
|
+
return heading
|
296
|
+
|
297
|
+
|
298
|
+
def text_to_shortname(long_text, forbidden_names=[]):
|
299
|
+
"""Create a slug for the question."""
|
300
|
+
proposed_name = shortname_proposal(long_text)
|
301
|
+
counter = 1
|
302
|
+
# make sure the name is unique
|
303
|
+
while proposed_name in forbidden_names:
|
304
|
+
proposed_name += f"_{counter}"
|
305
|
+
counter += 1
|
306
|
+
return proposed_name
|
307
|
+
|
308
|
+
|
309
|
+
def merge_dicts(dict_list):
|
310
|
+
"""Merge a list of dictionaries into a single dictionary."""
|
311
|
+
result = {}
|
312
|
+
all_keys = set()
|
313
|
+
for d in dict_list:
|
314
|
+
all_keys.update(d.keys())
|
315
|
+
for key in all_keys:
|
316
|
+
result[key] = [d.get(key, None) for d in dict_list]
|
317
|
+
return result
|
318
|
+
|
319
|
+
|
320
|
+
def extract_json_from_string(s):
|
321
|
+
"""Extract a JSON string from a string."""
|
322
|
+
# Find the first occurrence of '{'
|
323
|
+
start_idx = s.find("{")
|
324
|
+
# Find the last occurrence of '}'
|
325
|
+
end_idx = s.rfind("}")
|
326
|
+
# If both '{' and '}' are found in the string
|
327
|
+
if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
|
328
|
+
# Extract the substring from start_idx to end_idx (inclusive)
|
329
|
+
json_str = s[start_idx : end_idx + 1]
|
330
|
+
return json_str
|
331
|
+
else:
|
332
|
+
raise ValueError("No JSON object found in string")
|
333
|
+
|
334
|
+
|
335
|
+
def valid_json(json_string):
|
336
|
+
"""Check if a string is valid JSON."""
|
337
|
+
try:
|
338
|
+
_ = json.loads(json_string)
|
339
|
+
return True
|
340
|
+
except json.JSONDecodeError:
|
341
|
+
return False
|
342
|
+
|
343
|
+
|
344
|
+
def is_valid_variable_name(name, allow_name=True):
|
345
|
+
"""Check if a string is a valid variable name."""
|
346
|
+
if allow_name:
|
347
|
+
return name.isidentifier() and not keyword.iskeyword(name)
|
348
|
+
else:
|
349
|
+
return (
|
350
|
+
name.isidentifier() and not keyword.iskeyword(name) and not name == "name"
|
351
|
+
)
|
352
|
+
|
353
|
+
|
354
|
+
def create_valid_var_name(s, transform_func: Callable = lambda x: x.lower()) -> str:
|
355
|
+
"""Create a valid variable name from a string."""
|
356
|
+
if transform_func is None:
|
357
|
+
transform_func = lambda x: x
|
358
|
+
|
359
|
+
# Ensure the string is not empty
|
360
|
+
if not s:
|
361
|
+
raise ValueError("Input string cannot be empty.")
|
362
|
+
|
363
|
+
if is_valid_variable_name(s):
|
364
|
+
return transform_func(s)
|
365
|
+
|
366
|
+
# Remove leading numbers if they exist since variable names can't start with a number
|
367
|
+
s = re.sub("^[0-9]+", "", s)
|
368
|
+
|
369
|
+
# Replace invalid characters (anything not a letter, number, or underscore) with an underscore
|
370
|
+
s = re.sub("[^0-9a-zA-Z_]", "_", s)
|
371
|
+
|
372
|
+
# Check if the first character is a number; if so, prepend an underscore
|
373
|
+
if re.match("^[0-9]", s):
|
374
|
+
s = "_" + s
|
375
|
+
|
376
|
+
if s in keyword.kwlist:
|
377
|
+
s += "_"
|
378
|
+
|
379
|
+
# Ensure the string is not empty after the transformations
|
380
|
+
if not s:
|
381
|
+
raise ValueError(
|
382
|
+
"Input string does not contain valid characters for a variable name."
|
383
|
+
)
|
384
|
+
|
385
|
+
return transform_func(s)
|
386
|
+
|
387
|
+
|
388
|
+
def shorten_string(s, max_length, placeholder="..."):
|
389
|
+
"""Shorten a string to a maximum length by removing characters from the middle."""
|
390
|
+
if len(s) <= max_length:
|
391
|
+
return s
|
392
|
+
|
393
|
+
# Length to be removed
|
394
|
+
remove_length = len(s) - max_length + len(placeholder)
|
395
|
+
|
396
|
+
# Find the indices to start and end removal
|
397
|
+
start_remove = (len(s) - remove_length) // 2
|
398
|
+
end_remove = start_remove + remove_length
|
399
|
+
|
400
|
+
# Adjust start and end to break at spaces (if possible)
|
401
|
+
start_space = s.rfind(" ", 0, start_remove)
|
402
|
+
end_space = s.find(" ", end_remove)
|
403
|
+
|
404
|
+
if start_space != -1 and end_space != -1:
|
405
|
+
start_remove = start_space
|
406
|
+
end_remove = end_space
|
407
|
+
elif start_space != -1:
|
408
|
+
start_remove = start_space
|
409
|
+
elif end_space != -1:
|
410
|
+
end_remove = end_space
|
411
|
+
|
412
|
+
return s[:start_remove] + placeholder + s[end_remove:]
|
413
|
+
|
414
|
+
|
415
|
+
def write_api_key_to_env(api_key: str) -> str:
|
416
|
+
"""
|
417
|
+
Write the user's Expected Parrot key to their .env file.
|
418
|
+
|
419
|
+
If a .env file doesn't exist in the current directory, one will be created.
|
420
|
+
|
421
|
+
Returns a string representing the absolute path to the .env file.
|
422
|
+
"""
|
423
|
+
from pathlib import Path
|
424
|
+
from dotenv import set_key
|
425
|
+
|
426
|
+
# Create .env file if it doesn't exist
|
427
|
+
env_path = ".env"
|
428
|
+
env_file = Path(env_path)
|
429
|
+
env_file.touch(exist_ok=True)
|
430
|
+
|
431
|
+
# Write API key to file
|
432
|
+
set_key(env_path, "EXPECTED_PARROT_API_KEY", str(api_key))
|
433
|
+
|
434
|
+
absolute_path_to_env = env_file.absolute().as_posix()
|
435
|
+
|
436
|
+
return absolute_path_to_env
|