edsl 0.1.38.dev3__py3-none-any.whl → 0.1.38.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +332 -303
- edsl/BaseDiff.py +260 -260
- edsl/TemplateLoader.py +24 -24
- edsl/__init__.py +49 -49
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +867 -858
- edsl/agents/AgentList.py +413 -362
- edsl/agents/Invigilator.py +233 -222
- edsl/agents/InvigilatorBase.py +265 -284
- edsl/agents/PromptConstructor.py +354 -353
- edsl/agents/__init__.py +3 -3
- edsl/agents/descriptors.py +99 -99
- edsl/agents/prompt_helpers.py +129 -129
- edsl/auto/AutoStudy.py +117 -117
- edsl/auto/StageBase.py +230 -230
- edsl/auto/StageGenerateSurvey.py +178 -178
- edsl/auto/StageLabelQuestions.py +125 -125
- edsl/auto/StagePersona.py +61 -61
- edsl/auto/StagePersonaDimensionValueRanges.py +88 -88
- edsl/auto/StagePersonaDimensionValues.py +74 -74
- edsl/auto/StagePersonaDimensions.py +69 -69
- edsl/auto/StageQuestions.py +73 -73
- edsl/auto/SurveyCreatorPipeline.py +21 -21
- edsl/auto/utilities.py +224 -224
- edsl/base/Base.py +279 -279
- edsl/config.py +157 -149
- edsl/conversation/Conversation.py +290 -290
- edsl/conversation/car_buying.py +58 -58
- edsl/conversation/chips.py +95 -95
- edsl/conversation/mug_negotiation.py +81 -81
- edsl/conversation/next_speaker_utilities.py +93 -93
- edsl/coop/PriceFetcher.py +54 -54
- edsl/coop/__init__.py +2 -2
- edsl/coop/coop.py +1028 -961
- edsl/coop/utils.py +131 -131
- edsl/data/Cache.py +555 -530
- edsl/data/CacheEntry.py +233 -228
- edsl/data/CacheHandler.py +149 -149
- edsl/data/RemoteCacheSync.py +78 -97
- edsl/data/SQLiteDict.py +292 -292
- edsl/data/__init__.py +4 -4
- edsl/data/orm.py +10 -10
- edsl/data_transfer_models.py +73 -73
- edsl/enums.py +175 -173
- edsl/exceptions/BaseException.py +21 -21
- edsl/exceptions/__init__.py +54 -54
- edsl/exceptions/agents.py +42 -42
- edsl/exceptions/cache.py +5 -5
- edsl/exceptions/configuration.py +16 -16
- edsl/exceptions/coop.py +10 -10
- edsl/exceptions/data.py +14 -14
- edsl/exceptions/general.py +34 -34
- edsl/exceptions/jobs.py +33 -33
- edsl/exceptions/language_models.py +63 -63
- edsl/exceptions/prompts.py +15 -15
- edsl/exceptions/questions.py +91 -91
- edsl/exceptions/results.py +29 -29
- edsl/exceptions/scenarios.py +22 -22
- edsl/exceptions/surveys.py +37 -37
- edsl/inference_services/AnthropicService.py +87 -87
- edsl/inference_services/AwsBedrock.py +120 -120
- edsl/inference_services/AzureAI.py +217 -217
- edsl/inference_services/DeepInfraService.py +18 -18
- edsl/inference_services/GoogleService.py +148 -156
- edsl/inference_services/GroqService.py +20 -20
- edsl/inference_services/InferenceServiceABC.py +147 -147
- edsl/inference_services/InferenceServicesCollection.py +97 -97
- edsl/inference_services/MistralAIService.py +123 -123
- edsl/inference_services/OllamaService.py +18 -18
- edsl/inference_services/OpenAIService.py +224 -224
- edsl/inference_services/PerplexityService.py +163 -0
- edsl/inference_services/TestService.py +89 -89
- edsl/inference_services/TogetherAIService.py +170 -170
- edsl/inference_services/models_available_cache.py +118 -118
- edsl/inference_services/rate_limits_cache.py +25 -25
- edsl/inference_services/registry.py +41 -39
- edsl/inference_services/write_available.py +10 -10
- edsl/jobs/Answers.py +56 -56
- edsl/jobs/Jobs.py +898 -1358
- edsl/jobs/JobsChecks.py +147 -0
- edsl/jobs/JobsPrompts.py +268 -0
- edsl/jobs/JobsRemoteInferenceHandler.py +239 -0
- edsl/jobs/__init__.py +1 -1
- edsl/jobs/buckets/BucketCollection.py +63 -63
- edsl/jobs/buckets/ModelBuckets.py +65 -65
- edsl/jobs/buckets/TokenBucket.py +251 -251
- edsl/jobs/interviews/Interview.py +661 -661
- edsl/jobs/interviews/InterviewExceptionCollection.py +99 -99
- edsl/jobs/interviews/InterviewExceptionEntry.py +186 -186
- edsl/jobs/interviews/InterviewStatistic.py +63 -63
- edsl/jobs/interviews/InterviewStatisticsCollection.py +25 -25
- edsl/jobs/interviews/InterviewStatusDictionary.py +78 -78
- edsl/jobs/interviews/InterviewStatusLog.py +92 -92
- edsl/jobs/interviews/ReportErrors.py +66 -66
- edsl/jobs/interviews/interview_status_enum.py +9 -9
- edsl/jobs/runners/JobsRunnerAsyncio.py +466 -361
- edsl/jobs/runners/JobsRunnerStatus.py +330 -332
- edsl/jobs/tasks/QuestionTaskCreator.py +242 -242
- edsl/jobs/tasks/TaskCreators.py +64 -64
- edsl/jobs/tasks/TaskHistory.py +450 -451
- edsl/jobs/tasks/TaskStatusLog.py +23 -23
- edsl/jobs/tasks/task_status_enum.py +163 -163
- edsl/jobs/tokens/InterviewTokenUsage.py +27 -27
- edsl/jobs/tokens/TokenUsage.py +34 -34
- edsl/language_models/KeyLookup.py +30 -30
- edsl/language_models/LanguageModel.py +668 -708
- edsl/language_models/ModelList.py +155 -109
- edsl/language_models/RegisterLanguageModelsMeta.py +184 -184
- edsl/language_models/__init__.py +3 -3
- edsl/language_models/fake_openai_call.py +15 -15
- edsl/language_models/fake_openai_service.py +61 -61
- edsl/language_models/registry.py +190 -137
- edsl/language_models/repair.py +156 -156
- edsl/language_models/unused/ReplicateBase.py +83 -83
- edsl/language_models/utilities.py +64 -64
- edsl/notebooks/Notebook.py +258 -258
- edsl/notebooks/__init__.py +1 -1
- edsl/prompts/Prompt.py +362 -357
- edsl/prompts/__init__.py +2 -2
- edsl/questions/AnswerValidatorMixin.py +289 -289
- edsl/questions/QuestionBase.py +664 -660
- edsl/questions/QuestionBaseGenMixin.py +161 -161
- edsl/questions/QuestionBasePromptsMixin.py +217 -217
- edsl/questions/QuestionBudget.py +227 -227
- edsl/questions/QuestionCheckBox.py +359 -359
- edsl/questions/QuestionExtract.py +182 -183
- edsl/questions/QuestionFreeText.py +114 -114
- edsl/questions/QuestionFunctional.py +166 -166
- edsl/questions/QuestionList.py +231 -231
- edsl/questions/QuestionMultipleChoice.py +286 -286
- edsl/questions/QuestionNumerical.py +153 -153
- edsl/questions/QuestionRank.py +324 -324
- edsl/questions/Quick.py +41 -41
- edsl/questions/RegisterQuestionsMeta.py +71 -71
- edsl/questions/ResponseValidatorABC.py +174 -174
- edsl/questions/SimpleAskMixin.py +73 -73
- edsl/questions/__init__.py +26 -26
- edsl/questions/compose_questions.py +98 -98
- edsl/questions/decorators.py +21 -21
- edsl/questions/derived/QuestionLikertFive.py +76 -76
- edsl/questions/derived/QuestionLinearScale.py +87 -87
- edsl/questions/derived/QuestionTopK.py +93 -93
- edsl/questions/derived/QuestionYesNo.py +82 -82
- edsl/questions/descriptors.py +413 -413
- edsl/questions/prompt_templates/question_budget.jinja +13 -13
- edsl/questions/prompt_templates/question_checkbox.jinja +32 -32
- edsl/questions/prompt_templates/question_extract.jinja +11 -11
- edsl/questions/prompt_templates/question_free_text.jinja +3 -3
- edsl/questions/prompt_templates/question_linear_scale.jinja +11 -11
- edsl/questions/prompt_templates/question_list.jinja +17 -17
- edsl/questions/prompt_templates/question_multiple_choice.jinja +33 -33
- edsl/questions/prompt_templates/question_numerical.jinja +36 -36
- edsl/questions/question_registry.py +177 -147
- edsl/questions/settings.py +12 -12
- edsl/questions/templates/budget/answering_instructions.jinja +7 -7
- edsl/questions/templates/budget/question_presentation.jinja +7 -7
- edsl/questions/templates/checkbox/answering_instructions.jinja +10 -10
- edsl/questions/templates/checkbox/question_presentation.jinja +22 -22
- edsl/questions/templates/extract/answering_instructions.jinja +7 -7
- edsl/questions/templates/likert_five/answering_instructions.jinja +10 -10
- edsl/questions/templates/likert_five/question_presentation.jinja +11 -11
- edsl/questions/templates/linear_scale/answering_instructions.jinja +5 -5
- edsl/questions/templates/linear_scale/question_presentation.jinja +5 -5
- edsl/questions/templates/list/answering_instructions.jinja +3 -3
- edsl/questions/templates/list/question_presentation.jinja +5 -5
- edsl/questions/templates/multiple_choice/answering_instructions.jinja +9 -9
- edsl/questions/templates/multiple_choice/question_presentation.jinja +11 -11
- edsl/questions/templates/numerical/answering_instructions.jinja +6 -6
- edsl/questions/templates/numerical/question_presentation.jinja +6 -6
- edsl/questions/templates/rank/answering_instructions.jinja +11 -11
- edsl/questions/templates/rank/question_presentation.jinja +15 -15
- edsl/questions/templates/top_k/answering_instructions.jinja +8 -8
- edsl/questions/templates/top_k/question_presentation.jinja +22 -22
- edsl/questions/templates/yes_no/answering_instructions.jinja +6 -6
- edsl/questions/templates/yes_no/question_presentation.jinja +11 -11
- edsl/results/CSSParameterizer.py +108 -0
- edsl/results/Dataset.py +424 -293
- edsl/results/DatasetExportMixin.py +731 -717
- edsl/results/DatasetTree.py +275 -145
- edsl/results/Result.py +465 -456
- edsl/results/Results.py +1165 -1071
- edsl/results/ResultsDBMixin.py +238 -238
- edsl/results/ResultsExportMixin.py +43 -43
- edsl/results/ResultsFetchMixin.py +33 -33
- edsl/results/ResultsGGMixin.py +121 -121
- edsl/results/ResultsToolsMixin.py +98 -98
- edsl/results/Selector.py +135 -135
- edsl/results/TableDisplay.py +198 -0
- edsl/results/__init__.py +2 -2
- edsl/results/table_display.css +78 -0
- edsl/results/tree_explore.py +115 -115
- edsl/scenarios/FileStore.py +632 -458
- edsl/scenarios/Scenario.py +601 -544
- edsl/scenarios/ScenarioHtmlMixin.py +64 -64
- edsl/scenarios/ScenarioJoin.py +127 -0
- edsl/scenarios/ScenarioList.py +1287 -1112
- edsl/scenarios/ScenarioListExportMixin.py +52 -52
- edsl/scenarios/ScenarioListPdfMixin.py +261 -261
- edsl/scenarios/__init__.py +4 -4
- edsl/shared.py +1 -1
- edsl/study/ObjectEntry.py +173 -173
- edsl/study/ProofOfWork.py +113 -113
- edsl/study/SnapShot.py +80 -80
- edsl/study/Study.py +528 -528
- edsl/study/__init__.py +4 -4
- edsl/surveys/DAG.py +148 -148
- edsl/surveys/Memory.py +31 -31
- edsl/surveys/MemoryPlan.py +244 -244
- edsl/surveys/Rule.py +326 -326
- edsl/surveys/RuleCollection.py +387 -387
- edsl/surveys/Survey.py +1801 -1787
- edsl/surveys/SurveyCSS.py +261 -261
- edsl/surveys/SurveyExportMixin.py +259 -259
- edsl/surveys/SurveyFlowVisualizationMixin.py +179 -121
- edsl/surveys/SurveyQualtricsImport.py +284 -284
- edsl/surveys/__init__.py +3 -3
- edsl/surveys/base.py +53 -53
- edsl/surveys/descriptors.py +56 -56
- edsl/surveys/instructions/ChangeInstruction.py +49 -49
- edsl/surveys/instructions/Instruction.py +65 -53
- edsl/surveys/instructions/InstructionCollection.py +77 -77
- edsl/templates/error_reporting/base.html +23 -23
- edsl/templates/error_reporting/exceptions_by_model.html +34 -34
- edsl/templates/error_reporting/exceptions_by_question_name.html +16 -16
- edsl/templates/error_reporting/exceptions_by_type.html +16 -16
- edsl/templates/error_reporting/interview_details.html +115 -115
- edsl/templates/error_reporting/interviews.html +19 -10
- edsl/templates/error_reporting/overview.html +4 -4
- edsl/templates/error_reporting/performance_plot.html +1 -1
- edsl/templates/error_reporting/report.css +73 -73
- edsl/templates/error_reporting/report.html +117 -117
- edsl/templates/error_reporting/report.js +25 -25
- edsl/tools/__init__.py +1 -1
- edsl/tools/clusters.py +192 -192
- edsl/tools/embeddings.py +27 -27
- edsl/tools/embeddings_plotting.py +118 -118
- edsl/tools/plotting.py +112 -112
- edsl/tools/summarize.py +18 -18
- edsl/utilities/SystemInfo.py +28 -28
- edsl/utilities/__init__.py +22 -22
- edsl/utilities/ast_utilities.py +25 -25
- edsl/utilities/data/Registry.py +6 -6
- edsl/utilities/data/__init__.py +1 -1
- edsl/utilities/data/scooter_results.json +1 -1
- edsl/utilities/decorators.py +77 -77
- edsl/utilities/gcp_bucket/cloud_storage.py +96 -96
- edsl/utilities/interface.py +627 -627
- edsl/utilities/naming_utilities.py +263 -263
- edsl/utilities/repair_functions.py +28 -28
- edsl/utilities/restricted_python.py +70 -70
- edsl/utilities/utilities.py +424 -409
- {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/LICENSE +21 -21
- {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/METADATA +2 -1
- edsl-0.1.38.dev4.dist-info/RECORD +277 -0
- edsl-0.1.38.dev3.dist-info/RECORD +0 -269
- {edsl-0.1.38.dev3.dist-info → edsl-0.1.38.dev4.dist-info}/WHEEL +0 -0
@@ -1,263 +1,263 @@
|
|
1
|
-
import re
|
2
|
-
import keyword
|
3
|
-
|
4
|
-
stop_words = {
|
5
|
-
"into",
|
6
|
-
"mustn't",
|
7
|
-
"there",
|
8
|
-
"you'll",
|
9
|
-
"don",
|
10
|
-
"have",
|
11
|
-
"at",
|
12
|
-
"if",
|
13
|
-
"on",
|
14
|
-
"some",
|
15
|
-
"with",
|
16
|
-
"in",
|
17
|
-
"can",
|
18
|
-
"mightn",
|
19
|
-
"off",
|
20
|
-
"few",
|
21
|
-
"not",
|
22
|
-
"d",
|
23
|
-
"hadn",
|
24
|
-
"shan't",
|
25
|
-
"t",
|
26
|
-
"re",
|
27
|
-
"where",
|
28
|
-
"s",
|
29
|
-
"won't",
|
30
|
-
"mustn",
|
31
|
-
"wasn't",
|
32
|
-
"didn't",
|
33
|
-
"has",
|
34
|
-
"same",
|
35
|
-
"too",
|
36
|
-
"will",
|
37
|
-
"you've",
|
38
|
-
"all",
|
39
|
-
"haven't",
|
40
|
-
"isn't",
|
41
|
-
"over",
|
42
|
-
"of",
|
43
|
-
"about",
|
44
|
-
"its",
|
45
|
-
"being",
|
46
|
-
"it",
|
47
|
-
"her",
|
48
|
-
"should",
|
49
|
-
"himself",
|
50
|
-
"wasn",
|
51
|
-
"out",
|
52
|
-
"theirs",
|
53
|
-
"aren",
|
54
|
-
"that",
|
55
|
-
"our",
|
56
|
-
"shouldn't",
|
57
|
-
"you'd",
|
58
|
-
"such",
|
59
|
-
"above",
|
60
|
-
"my",
|
61
|
-
"the",
|
62
|
-
"any",
|
63
|
-
"been",
|
64
|
-
"as",
|
65
|
-
"very",
|
66
|
-
"herself",
|
67
|
-
"o",
|
68
|
-
"weren",
|
69
|
-
"until",
|
70
|
-
"their",
|
71
|
-
"shouldn",
|
72
|
-
"up",
|
73
|
-
"wouldn",
|
74
|
-
"couldn't",
|
75
|
-
"hasn't",
|
76
|
-
"no",
|
77
|
-
"than",
|
78
|
-
"hadn't",
|
79
|
-
"had",
|
80
|
-
"you",
|
81
|
-
"here",
|
82
|
-
"yourself",
|
83
|
-
"yourselves",
|
84
|
-
"during",
|
85
|
-
"ain",
|
86
|
-
"once",
|
87
|
-
"aren't",
|
88
|
-
"what",
|
89
|
-
"so",
|
90
|
-
"hers",
|
91
|
-
"that'll",
|
92
|
-
"other",
|
93
|
-
"ours",
|
94
|
-
"yours",
|
95
|
-
"nor",
|
96
|
-
"him",
|
97
|
-
"doesn",
|
98
|
-
"doesn't",
|
99
|
-
"he",
|
100
|
-
"them",
|
101
|
-
"for",
|
102
|
-
"ll",
|
103
|
-
"isn",
|
104
|
-
"this",
|
105
|
-
"or",
|
106
|
-
"who",
|
107
|
-
"only",
|
108
|
-
"itself",
|
109
|
-
"they",
|
110
|
-
"between",
|
111
|
-
"against",
|
112
|
-
"under",
|
113
|
-
"me",
|
114
|
-
"now",
|
115
|
-
"mightn't",
|
116
|
-
"those",
|
117
|
-
"needn't",
|
118
|
-
"these",
|
119
|
-
"when",
|
120
|
-
"before",
|
121
|
-
"his",
|
122
|
-
"she's",
|
123
|
-
"having",
|
124
|
-
"be",
|
125
|
-
"don't",
|
126
|
-
"haven",
|
127
|
-
"won",
|
128
|
-
"while",
|
129
|
-
"both",
|
130
|
-
"didn",
|
131
|
-
"by",
|
132
|
-
"ourselves",
|
133
|
-
"m",
|
134
|
-
"your",
|
135
|
-
"then",
|
136
|
-
"myself",
|
137
|
-
"we",
|
138
|
-
"it's",
|
139
|
-
"should've",
|
140
|
-
"through",
|
141
|
-
"why",
|
142
|
-
"from",
|
143
|
-
"and",
|
144
|
-
"hasn",
|
145
|
-
"more",
|
146
|
-
"how",
|
147
|
-
"ve",
|
148
|
-
"most",
|
149
|
-
"because",
|
150
|
-
"did",
|
151
|
-
"y",
|
152
|
-
"i",
|
153
|
-
"an",
|
154
|
-
"but",
|
155
|
-
"whom",
|
156
|
-
"below",
|
157
|
-
"further",
|
158
|
-
"am",
|
159
|
-
"which",
|
160
|
-
"just",
|
161
|
-
"ma",
|
162
|
-
"you're",
|
163
|
-
"couldn",
|
164
|
-
"do",
|
165
|
-
"shan",
|
166
|
-
"own",
|
167
|
-
"again",
|
168
|
-
"are",
|
169
|
-
"weren't",
|
170
|
-
"down",
|
171
|
-
"is",
|
172
|
-
"were",
|
173
|
-
"each",
|
174
|
-
"needn",
|
175
|
-
"themselves",
|
176
|
-
"she",
|
177
|
-
"after",
|
178
|
-
"does",
|
179
|
-
"wouldn't",
|
180
|
-
"to",
|
181
|
-
"a",
|
182
|
-
"was",
|
183
|
-
"doing",
|
184
|
-
}
|
185
|
-
|
186
|
-
|
187
|
-
def sanitize_string(input_string, max_length=35):
|
188
|
-
"""Return a sanitized version of the input string that can be used as a variable name.
|
189
|
-
|
190
|
-
>>> candidate_names = ["How are you doing this morning, Dave? What is your favorite kind of coffee?", "class", "def", "here_is_some_text"]
|
191
|
-
>>> [sanitize_string(name) for name in candidate_names]
|
192
|
-
['morning_dave_favorite_kind_coffee', 'class_modified', 'def_modified', 'here_is_some_text']
|
193
|
-
"""
|
194
|
-
|
195
|
-
# Ensure nltk stopwords are downloaded
|
196
|
-
# try:
|
197
|
-
# from nltk.corpus import stopwords
|
198
|
-
# except ImportError or ModuleNotFoundError:
|
199
|
-
# print(
|
200
|
-
# "nltk is not installed. Please install it using 'pip install nltk' to use these features."
|
201
|
-
# )
|
202
|
-
# raise
|
203
|
-
|
204
|
-
# try:
|
205
|
-
# stop_words = set(stopwords.words("english"))
|
206
|
-
# except LookupError:
|
207
|
-
# nltk.download("stopwords")
|
208
|
-
# stop_words = set(stopwords.words("english"))
|
209
|
-
# # raise LookupError("Stopwords not found. Please download them using nltk.download('stopwords')")
|
210
|
-
|
211
|
-
# # Define the list of stopwords
|
212
|
-
|
213
|
-
# Replace special characters with spaces and split into words
|
214
|
-
words = re.sub(r"\W+", " ", input_string).split()
|
215
|
-
|
216
|
-
# Remove stopwords
|
217
|
-
important_words = [word for word in words if word.lower() not in stop_words]
|
218
|
-
|
219
|
-
# Join words with underscores
|
220
|
-
sanitized_string = "_".join(important_words)
|
221
|
-
|
222
|
-
# Ensure the length is less than 25 characters
|
223
|
-
if len(sanitized_string) > max_length:
|
224
|
-
# split off the last word and remove it
|
225
|
-
words = sanitized_string[:max_length].split("_")
|
226
|
-
if len(words) == 1:
|
227
|
-
sanitized_string = words[0]
|
228
|
-
else:
|
229
|
-
sanitized_string = "_".join(words[:-1])
|
230
|
-
|
231
|
-
# Remove leading and trailing underscores
|
232
|
-
sanitized_string = sanitized_string.strip("_")
|
233
|
-
|
234
|
-
# Check if the string is a Python keyword
|
235
|
-
if keyword.iskeyword(sanitized_string):
|
236
|
-
sanitized_string += "_modified"
|
237
|
-
|
238
|
-
result = sanitized_string.lower()
|
239
|
-
return result
|
240
|
-
|
241
|
-
|
242
|
-
# Example usage
|
243
|
-
# input_string = "This is a sample variable-name@123 for testing"
|
244
|
-
# sanitized_string = sanitize_string(input_string)
|
245
|
-
# print(sanitized_string) # Output might be: sample_variable_name_123
|
246
|
-
|
247
|
-
# if __name__ == "__main__":
|
248
|
-
# candidate_names = [
|
249
|
-
# "How are you doing this morning, Dave? What is your favorite kind of coffee?",
|
250
|
-
# "class",
|
251
|
-
# "def",
|
252
|
-
# "here_is_some_text",
|
253
|
-
# ]
|
254
|
-
# for name in candidate_names:
|
255
|
-
# print(f"Original: {name}")
|
256
|
-
# print(f"Sanitized: {sanitize_string(name)}")
|
257
|
-
# print()
|
258
|
-
|
259
|
-
if __name__ == "__main__":
|
260
|
-
# from edsl.conjure.InputData import InputDataABC
|
261
|
-
import doctest
|
262
|
-
|
263
|
-
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
1
|
+
import re
|
2
|
+
import keyword
|
3
|
+
|
4
|
+
stop_words = {
|
5
|
+
"into",
|
6
|
+
"mustn't",
|
7
|
+
"there",
|
8
|
+
"you'll",
|
9
|
+
"don",
|
10
|
+
"have",
|
11
|
+
"at",
|
12
|
+
"if",
|
13
|
+
"on",
|
14
|
+
"some",
|
15
|
+
"with",
|
16
|
+
"in",
|
17
|
+
"can",
|
18
|
+
"mightn",
|
19
|
+
"off",
|
20
|
+
"few",
|
21
|
+
"not",
|
22
|
+
"d",
|
23
|
+
"hadn",
|
24
|
+
"shan't",
|
25
|
+
"t",
|
26
|
+
"re",
|
27
|
+
"where",
|
28
|
+
"s",
|
29
|
+
"won't",
|
30
|
+
"mustn",
|
31
|
+
"wasn't",
|
32
|
+
"didn't",
|
33
|
+
"has",
|
34
|
+
"same",
|
35
|
+
"too",
|
36
|
+
"will",
|
37
|
+
"you've",
|
38
|
+
"all",
|
39
|
+
"haven't",
|
40
|
+
"isn't",
|
41
|
+
"over",
|
42
|
+
"of",
|
43
|
+
"about",
|
44
|
+
"its",
|
45
|
+
"being",
|
46
|
+
"it",
|
47
|
+
"her",
|
48
|
+
"should",
|
49
|
+
"himself",
|
50
|
+
"wasn",
|
51
|
+
"out",
|
52
|
+
"theirs",
|
53
|
+
"aren",
|
54
|
+
"that",
|
55
|
+
"our",
|
56
|
+
"shouldn't",
|
57
|
+
"you'd",
|
58
|
+
"such",
|
59
|
+
"above",
|
60
|
+
"my",
|
61
|
+
"the",
|
62
|
+
"any",
|
63
|
+
"been",
|
64
|
+
"as",
|
65
|
+
"very",
|
66
|
+
"herself",
|
67
|
+
"o",
|
68
|
+
"weren",
|
69
|
+
"until",
|
70
|
+
"their",
|
71
|
+
"shouldn",
|
72
|
+
"up",
|
73
|
+
"wouldn",
|
74
|
+
"couldn't",
|
75
|
+
"hasn't",
|
76
|
+
"no",
|
77
|
+
"than",
|
78
|
+
"hadn't",
|
79
|
+
"had",
|
80
|
+
"you",
|
81
|
+
"here",
|
82
|
+
"yourself",
|
83
|
+
"yourselves",
|
84
|
+
"during",
|
85
|
+
"ain",
|
86
|
+
"once",
|
87
|
+
"aren't",
|
88
|
+
"what",
|
89
|
+
"so",
|
90
|
+
"hers",
|
91
|
+
"that'll",
|
92
|
+
"other",
|
93
|
+
"ours",
|
94
|
+
"yours",
|
95
|
+
"nor",
|
96
|
+
"him",
|
97
|
+
"doesn",
|
98
|
+
"doesn't",
|
99
|
+
"he",
|
100
|
+
"them",
|
101
|
+
"for",
|
102
|
+
"ll",
|
103
|
+
"isn",
|
104
|
+
"this",
|
105
|
+
"or",
|
106
|
+
"who",
|
107
|
+
"only",
|
108
|
+
"itself",
|
109
|
+
"they",
|
110
|
+
"between",
|
111
|
+
"against",
|
112
|
+
"under",
|
113
|
+
"me",
|
114
|
+
"now",
|
115
|
+
"mightn't",
|
116
|
+
"those",
|
117
|
+
"needn't",
|
118
|
+
"these",
|
119
|
+
"when",
|
120
|
+
"before",
|
121
|
+
"his",
|
122
|
+
"she's",
|
123
|
+
"having",
|
124
|
+
"be",
|
125
|
+
"don't",
|
126
|
+
"haven",
|
127
|
+
"won",
|
128
|
+
"while",
|
129
|
+
"both",
|
130
|
+
"didn",
|
131
|
+
"by",
|
132
|
+
"ourselves",
|
133
|
+
"m",
|
134
|
+
"your",
|
135
|
+
"then",
|
136
|
+
"myself",
|
137
|
+
"we",
|
138
|
+
"it's",
|
139
|
+
"should've",
|
140
|
+
"through",
|
141
|
+
"why",
|
142
|
+
"from",
|
143
|
+
"and",
|
144
|
+
"hasn",
|
145
|
+
"more",
|
146
|
+
"how",
|
147
|
+
"ve",
|
148
|
+
"most",
|
149
|
+
"because",
|
150
|
+
"did",
|
151
|
+
"y",
|
152
|
+
"i",
|
153
|
+
"an",
|
154
|
+
"but",
|
155
|
+
"whom",
|
156
|
+
"below",
|
157
|
+
"further",
|
158
|
+
"am",
|
159
|
+
"which",
|
160
|
+
"just",
|
161
|
+
"ma",
|
162
|
+
"you're",
|
163
|
+
"couldn",
|
164
|
+
"do",
|
165
|
+
"shan",
|
166
|
+
"own",
|
167
|
+
"again",
|
168
|
+
"are",
|
169
|
+
"weren't",
|
170
|
+
"down",
|
171
|
+
"is",
|
172
|
+
"were",
|
173
|
+
"each",
|
174
|
+
"needn",
|
175
|
+
"themselves",
|
176
|
+
"she",
|
177
|
+
"after",
|
178
|
+
"does",
|
179
|
+
"wouldn't",
|
180
|
+
"to",
|
181
|
+
"a",
|
182
|
+
"was",
|
183
|
+
"doing",
|
184
|
+
}
|
185
|
+
|
186
|
+
|
187
|
+
def sanitize_string(input_string, max_length=35):
|
188
|
+
"""Return a sanitized version of the input string that can be used as a variable name.
|
189
|
+
|
190
|
+
>>> candidate_names = ["How are you doing this morning, Dave? What is your favorite kind of coffee?", "class", "def", "here_is_some_text"]
|
191
|
+
>>> [sanitize_string(name) for name in candidate_names]
|
192
|
+
['morning_dave_favorite_kind_coffee', 'class_modified', 'def_modified', 'here_is_some_text']
|
193
|
+
"""
|
194
|
+
|
195
|
+
# Ensure nltk stopwords are downloaded
|
196
|
+
# try:
|
197
|
+
# from nltk.corpus import stopwords
|
198
|
+
# except ImportError or ModuleNotFoundError:
|
199
|
+
# print(
|
200
|
+
# "nltk is not installed. Please install it using 'pip install nltk' to use these features."
|
201
|
+
# )
|
202
|
+
# raise
|
203
|
+
|
204
|
+
# try:
|
205
|
+
# stop_words = set(stopwords.words("english"))
|
206
|
+
# except LookupError:
|
207
|
+
# nltk.download("stopwords")
|
208
|
+
# stop_words = set(stopwords.words("english"))
|
209
|
+
# # raise LookupError("Stopwords not found. Please download them using nltk.download('stopwords')")
|
210
|
+
|
211
|
+
# # Define the list of stopwords
|
212
|
+
|
213
|
+
# Replace special characters with spaces and split into words
|
214
|
+
words = re.sub(r"\W+", " ", input_string).split()
|
215
|
+
|
216
|
+
# Remove stopwords
|
217
|
+
important_words = [word for word in words if word.lower() not in stop_words]
|
218
|
+
|
219
|
+
# Join words with underscores
|
220
|
+
sanitized_string = "_".join(important_words)
|
221
|
+
|
222
|
+
# Ensure the length is less than 25 characters
|
223
|
+
if len(sanitized_string) > max_length:
|
224
|
+
# split off the last word and remove it
|
225
|
+
words = sanitized_string[:max_length].split("_")
|
226
|
+
if len(words) == 1:
|
227
|
+
sanitized_string = words[0]
|
228
|
+
else:
|
229
|
+
sanitized_string = "_".join(words[:-1])
|
230
|
+
|
231
|
+
# Remove leading and trailing underscores
|
232
|
+
sanitized_string = sanitized_string.strip("_")
|
233
|
+
|
234
|
+
# Check if the string is a Python keyword
|
235
|
+
if keyword.iskeyword(sanitized_string):
|
236
|
+
sanitized_string += "_modified"
|
237
|
+
|
238
|
+
result = sanitized_string.lower()
|
239
|
+
return result
|
240
|
+
|
241
|
+
|
242
|
+
# Example usage
|
243
|
+
# input_string = "This is a sample variable-name@123 for testing"
|
244
|
+
# sanitized_string = sanitize_string(input_string)
|
245
|
+
# print(sanitized_string) # Output might be: sample_variable_name_123
|
246
|
+
|
247
|
+
# if __name__ == "__main__":
|
248
|
+
# candidate_names = [
|
249
|
+
# "How are you doing this morning, Dave? What is your favorite kind of coffee?",
|
250
|
+
# "class",
|
251
|
+
# "def",
|
252
|
+
# "here_is_some_text",
|
253
|
+
# ]
|
254
|
+
# for name in candidate_names:
|
255
|
+
# print(f"Original: {name}")
|
256
|
+
# print(f"Sanitized: {sanitize_string(name)}")
|
257
|
+
# print()
|
258
|
+
|
259
|
+
if __name__ == "__main__":
|
260
|
+
# from edsl.conjure.InputData import InputDataABC
|
261
|
+
import doctest
|
262
|
+
|
263
|
+
doctest.testmod(optionflags=doctest.ELLIPSIS)
|
@@ -1,28 +1,28 @@
|
|
1
|
-
import json
|
2
|
-
from edsl.utilities.utilities import valid_json
|
3
|
-
|
4
|
-
|
5
|
-
def extract_json_from_string(s):
|
6
|
-
"""Extract a JSON string from a string."""
|
7
|
-
# Find the first occurrence of '{'
|
8
|
-
start_idx = s.find("{")
|
9
|
-
# Find the last occurrence of '}'
|
10
|
-
end_idx = s.rfind("}")
|
11
|
-
# If both '{' and '}' are found in the string
|
12
|
-
if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
|
13
|
-
# Extract the substring from start_idx to end_idx (inclusive)
|
14
|
-
json_str = s[start_idx : end_idx + 1]
|
15
|
-
try:
|
16
|
-
return json.loads(json_str)
|
17
|
-
except json.JSONDecodeError:
|
18
|
-
raise ValueError("Invalid JSON string")
|
19
|
-
else:
|
20
|
-
raise ValueError("No JSON object found in string")
|
21
|
-
|
22
|
-
|
23
|
-
if __name__ == "__main__":
|
24
|
-
text = (
|
25
|
-
'Sure - here is some JSON { "key": "value", "number": 123, "array": [1, 2, 3] }'
|
26
|
-
)
|
27
|
-
extracted_json = extract_json_from_string(text)
|
28
|
-
d = extracted_json
|
1
|
+
import json
|
2
|
+
from edsl.utilities.utilities import valid_json
|
3
|
+
|
4
|
+
|
5
|
+
def extract_json_from_string(s):
|
6
|
+
"""Extract a JSON string from a string."""
|
7
|
+
# Find the first occurrence of '{'
|
8
|
+
start_idx = s.find("{")
|
9
|
+
# Find the last occurrence of '}'
|
10
|
+
end_idx = s.rfind("}")
|
11
|
+
# If both '{' and '}' are found in the string
|
12
|
+
if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
|
13
|
+
# Extract the substring from start_idx to end_idx (inclusive)
|
14
|
+
json_str = s[start_idx : end_idx + 1]
|
15
|
+
try:
|
16
|
+
return json.loads(json_str)
|
17
|
+
except json.JSONDecodeError:
|
18
|
+
raise ValueError("Invalid JSON string")
|
19
|
+
else:
|
20
|
+
raise ValueError("No JSON object found in string")
|
21
|
+
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
text = (
|
25
|
+
'Sure - here is some JSON { "key": "value", "number": 123, "array": [1, 2, 3] }'
|
26
|
+
)
|
27
|
+
extracted_json = extract_json_from_string(text)
|
28
|
+
d = extracted_json
|