edsl 0.1.39__py3-none-any.whl → 0.1.39.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/Base.py +116 -197
- edsl/__init__.py +7 -15
- edsl/__version__.py +1 -1
- edsl/agents/Agent.py +147 -351
- edsl/agents/AgentList.py +73 -211
- edsl/agents/Invigilator.py +50 -101
- edsl/agents/InvigilatorBase.py +70 -62
- edsl/agents/PromptConstructor.py +225 -143
- edsl/agents/__init__.py +1 -0
- edsl/agents/prompt_helpers.py +3 -3
- edsl/auto/AutoStudy.py +5 -18
- edsl/auto/StageBase.py +40 -53
- edsl/auto/StageQuestions.py +1 -2
- edsl/auto/utilities.py +6 -0
- edsl/config.py +2 -22
- edsl/conversation/car_buying.py +1 -2
- edsl/coop/PriceFetcher.py +1 -1
- edsl/coop/coop.py +47 -125
- edsl/coop/utils.py +14 -14
- edsl/data/Cache.py +27 -45
- edsl/data/CacheEntry.py +15 -12
- edsl/data/CacheHandler.py +12 -31
- edsl/data/RemoteCacheSync.py +46 -154
- edsl/data/__init__.py +3 -4
- edsl/data_transfer_models.py +1 -2
- edsl/enums.py +0 -27
- edsl/exceptions/__init__.py +50 -50
- edsl/exceptions/agents.py +0 -12
- edsl/exceptions/questions.py +6 -24
- edsl/exceptions/scenarios.py +0 -7
- edsl/inference_services/AnthropicService.py +19 -38
- edsl/inference_services/AwsBedrock.py +2 -0
- edsl/inference_services/AzureAI.py +2 -0
- edsl/inference_services/GoogleService.py +12 -7
- edsl/inference_services/InferenceServiceABC.py +85 -18
- edsl/inference_services/InferenceServicesCollection.py +79 -120
- edsl/inference_services/MistralAIService.py +3 -0
- edsl/inference_services/OpenAIService.py +35 -47
- edsl/inference_services/PerplexityService.py +3 -0
- edsl/inference_services/TestService.py +10 -11
- edsl/inference_services/TogetherAIService.py +3 -5
- edsl/jobs/Answers.py +14 -1
- edsl/jobs/Jobs.py +431 -356
- edsl/jobs/JobsChecks.py +10 -35
- edsl/jobs/JobsPrompts.py +4 -6
- edsl/jobs/JobsRemoteInferenceHandler.py +133 -205
- edsl/jobs/buckets/BucketCollection.py +3 -44
- edsl/jobs/buckets/TokenBucket.py +21 -53
- edsl/jobs/interviews/Interview.py +408 -143
- edsl/jobs/runners/JobsRunnerAsyncio.py +403 -88
- edsl/jobs/runners/JobsRunnerStatus.py +165 -133
- edsl/jobs/tasks/QuestionTaskCreator.py +19 -21
- edsl/jobs/tasks/TaskHistory.py +18 -38
- edsl/jobs/tasks/task_status_enum.py +2 -0
- edsl/language_models/KeyLookup.py +30 -0
- edsl/language_models/LanguageModel.py +236 -194
- edsl/language_models/ModelList.py +19 -28
- edsl/language_models/__init__.py +2 -1
- edsl/language_models/registry.py +190 -0
- edsl/language_models/repair.py +2 -2
- edsl/language_models/unused/ReplicateBase.py +83 -0
- edsl/language_models/utilities.py +4 -5
- edsl/notebooks/Notebook.py +14 -19
- edsl/prompts/Prompt.py +39 -29
- edsl/questions/{answer_validator_mixin.py → AnswerValidatorMixin.py} +2 -47
- edsl/questions/QuestionBase.py +214 -68
- edsl/questions/{question_base_gen_mixin.py → QuestionBaseGenMixin.py} +50 -57
- edsl/questions/QuestionBasePromptsMixin.py +3 -7
- edsl/questions/QuestionBudget.py +1 -1
- edsl/questions/QuestionCheckBox.py +3 -3
- edsl/questions/QuestionExtract.py +7 -5
- edsl/questions/QuestionFreeText.py +3 -2
- edsl/questions/QuestionList.py +18 -10
- edsl/questions/QuestionMultipleChoice.py +23 -67
- edsl/questions/QuestionNumerical.py +4 -2
- edsl/questions/QuestionRank.py +17 -7
- edsl/questions/{response_validator_abc.py → ResponseValidatorABC.py} +26 -40
- edsl/questions/SimpleAskMixin.py +3 -4
- edsl/questions/__init__.py +1 -2
- edsl/questions/derived/QuestionLinearScale.py +3 -6
- edsl/questions/derived/QuestionTopK.py +1 -1
- edsl/questions/descriptors.py +3 -17
- edsl/questions/question_registry.py +1 -1
- edsl/results/CSSParameterizer.py +1 -1
- edsl/results/Dataset.py +7 -170
- edsl/results/DatasetExportMixin.py +305 -168
- edsl/results/DatasetTree.py +8 -28
- edsl/results/Result.py +206 -298
- edsl/results/Results.py +131 -149
- edsl/results/ResultsDBMixin.py +238 -0
- edsl/results/ResultsExportMixin.py +0 -2
- edsl/results/{results_selector.py → Selector.py} +13 -23
- edsl/results/TableDisplay.py +171 -98
- edsl/results/__init__.py +1 -1
- edsl/scenarios/FileStore.py +239 -150
- edsl/scenarios/Scenario.py +193 -90
- edsl/scenarios/ScenarioHtmlMixin.py +3 -4
- edsl/scenarios/{scenario_join.py → ScenarioJoin.py} +6 -10
- edsl/scenarios/ScenarioList.py +244 -415
- edsl/scenarios/ScenarioListExportMixin.py +7 -0
- edsl/scenarios/ScenarioListPdfMixin.py +37 -15
- edsl/scenarios/__init__.py +2 -1
- edsl/study/ObjectEntry.py +1 -1
- edsl/study/SnapShot.py +1 -1
- edsl/study/Study.py +12 -5
- edsl/surveys/Rule.py +4 -5
- edsl/surveys/RuleCollection.py +27 -25
- edsl/surveys/Survey.py +791 -270
- edsl/surveys/SurveyCSS.py +8 -20
- edsl/surveys/{SurveyFlowVisualization.py → SurveyFlowVisualizationMixin.py} +9 -11
- edsl/surveys/__init__.py +2 -4
- edsl/surveys/descriptors.py +2 -6
- edsl/surveys/instructions/ChangeInstruction.py +2 -1
- edsl/surveys/instructions/Instruction.py +13 -4
- edsl/surveys/instructions/InstructionCollection.py +6 -11
- edsl/templates/error_reporting/interview_details.html +1 -1
- edsl/templates/error_reporting/report.html +1 -1
- edsl/tools/plotting.py +1 -1
- edsl/utilities/utilities.py +23 -35
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/METADATA +10 -12
- edsl-0.1.39.dev1.dist-info/RECORD +277 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/WHEEL +1 -1
- edsl/agents/QuestionInstructionPromptBuilder.py +0 -128
- edsl/agents/QuestionTemplateReplacementsBuilder.py +0 -137
- edsl/agents/question_option_processor.py +0 -172
- edsl/coop/CoopFunctionsMixin.py +0 -15
- edsl/coop/ExpectedParrotKeyHandler.py +0 -125
- edsl/exceptions/inference_services.py +0 -5
- edsl/inference_services/AvailableModelCacheHandler.py +0 -184
- edsl/inference_services/AvailableModelFetcher.py +0 -215
- edsl/inference_services/ServiceAvailability.py +0 -135
- edsl/inference_services/data_structures.py +0 -134
- edsl/jobs/AnswerQuestionFunctionConstructor.py +0 -223
- edsl/jobs/FetchInvigilator.py +0 -47
- edsl/jobs/InterviewTaskManager.py +0 -98
- edsl/jobs/InterviewsConstructor.py +0 -50
- edsl/jobs/JobsComponentConstructor.py +0 -189
- edsl/jobs/JobsRemoteInferenceLogger.py +0 -239
- edsl/jobs/RequestTokenEstimator.py +0 -30
- edsl/jobs/async_interview_runner.py +0 -138
- edsl/jobs/buckets/TokenBucketAPI.py +0 -211
- edsl/jobs/buckets/TokenBucketClient.py +0 -191
- edsl/jobs/check_survey_scenario_compatibility.py +0 -85
- edsl/jobs/data_structures.py +0 -120
- edsl/jobs/decorators.py +0 -35
- edsl/jobs/jobs_status_enums.py +0 -9
- edsl/jobs/loggers/HTMLTableJobLogger.py +0 -304
- edsl/jobs/results_exceptions_handler.py +0 -98
- edsl/language_models/ComputeCost.py +0 -63
- edsl/language_models/PriceManager.py +0 -127
- edsl/language_models/RawResponseHandler.py +0 -106
- edsl/language_models/ServiceDataSources.py +0 -0
- edsl/language_models/key_management/KeyLookup.py +0 -63
- edsl/language_models/key_management/KeyLookupBuilder.py +0 -273
- edsl/language_models/key_management/KeyLookupCollection.py +0 -38
- edsl/language_models/key_management/__init__.py +0 -0
- edsl/language_models/key_management/models.py +0 -131
- edsl/language_models/model.py +0 -256
- edsl/notebooks/NotebookToLaTeX.py +0 -142
- edsl/questions/ExceptionExplainer.py +0 -77
- edsl/questions/HTMLQuestion.py +0 -103
- edsl/questions/QuestionMatrix.py +0 -265
- edsl/questions/data_structures.py +0 -20
- edsl/questions/loop_processor.py +0 -149
- edsl/questions/response_validator_factory.py +0 -34
- edsl/questions/templates/matrix/__init__.py +0 -1
- edsl/questions/templates/matrix/answering_instructions.jinja +0 -5
- edsl/questions/templates/matrix/question_presentation.jinja +0 -20
- edsl/results/MarkdownToDocx.py +0 -122
- edsl/results/MarkdownToPDF.py +0 -111
- edsl/results/TextEditor.py +0 -50
- edsl/results/file_exports.py +0 -252
- edsl/results/smart_objects.py +0 -96
- edsl/results/table_data_class.py +0 -12
- edsl/results/table_renderers.py +0 -118
- edsl/scenarios/ConstructDownloadLink.py +0 -109
- edsl/scenarios/DocumentChunker.py +0 -102
- edsl/scenarios/DocxScenario.py +0 -16
- edsl/scenarios/PdfExtractor.py +0 -40
- edsl/scenarios/directory_scanner.py +0 -96
- edsl/scenarios/file_methods.py +0 -85
- edsl/scenarios/handlers/__init__.py +0 -13
- edsl/scenarios/handlers/csv.py +0 -49
- edsl/scenarios/handlers/docx.py +0 -76
- edsl/scenarios/handlers/html.py +0 -37
- edsl/scenarios/handlers/json.py +0 -111
- edsl/scenarios/handlers/latex.py +0 -5
- edsl/scenarios/handlers/md.py +0 -51
- edsl/scenarios/handlers/pdf.py +0 -68
- edsl/scenarios/handlers/png.py +0 -39
- edsl/scenarios/handlers/pptx.py +0 -105
- edsl/scenarios/handlers/py.py +0 -294
- edsl/scenarios/handlers/sql.py +0 -313
- edsl/scenarios/handlers/sqlite.py +0 -149
- edsl/scenarios/handlers/txt.py +0 -33
- edsl/scenarios/scenario_selector.py +0 -156
- edsl/surveys/ConstructDAG.py +0 -92
- edsl/surveys/EditSurvey.py +0 -221
- edsl/surveys/InstructionHandler.py +0 -100
- edsl/surveys/MemoryManagement.py +0 -72
- edsl/surveys/RuleManager.py +0 -172
- edsl/surveys/Simulator.py +0 -75
- edsl/surveys/SurveyToApp.py +0 -141
- edsl/utilities/PrettyList.py +0 -56
- edsl/utilities/is_notebook.py +0 -18
- edsl/utilities/is_valid_variable_name.py +0 -11
- edsl/utilities/remove_edsl_version.py +0 -24
- edsl-0.1.39.dist-info/RECORD +0 -358
- /edsl/questions/{register_questions_meta.py → RegisterQuestionsMeta.py} +0 -0
- /edsl/results/{results_fetch_mixin.py → ResultsFetchMixin.py} +0 -0
- /edsl/results/{results_tools_mixin.py → ResultsToolsMixin.py} +0 -0
- {edsl-0.1.39.dist-info → edsl-0.1.39.dev1.dist-info}/LICENSE +0 -0
edsl/data/Cache.py
CHANGED
@@ -6,10 +6,12 @@ from __future__ import annotations
|
|
6
6
|
import json
|
7
7
|
import os
|
8
8
|
import warnings
|
9
|
-
|
9
|
+
import copy
|
10
|
+
from typing import Optional, Union
|
10
11
|
from edsl.Base import Base
|
11
|
-
|
12
|
-
from edsl.utilities.
|
12
|
+
from edsl.data.CacheEntry import CacheEntry
|
13
|
+
from edsl.utilities.utilities import dict_hash
|
14
|
+
from edsl.utilities.decorators import remove_edsl_version
|
13
15
|
from edsl.exceptions.cache import CacheError
|
14
16
|
|
15
17
|
|
@@ -81,6 +83,10 @@ class Cache(Base):
|
|
81
83
|
|
82
84
|
self._perform_checks()
|
83
85
|
|
86
|
+
def rich_print(sefl):
|
87
|
+
pass
|
88
|
+
# raise NotImplementedError("This method is not implemented yet.")
|
89
|
+
|
84
90
|
def code(sefl):
|
85
91
|
pass
|
86
92
|
# raise NotImplementedError("This method is not implemented yet.")
|
@@ -195,7 +201,6 @@ class Cache(Base):
|
|
195
201
|
>>> len(c)
|
196
202
|
1
|
197
203
|
"""
|
198
|
-
from edsl.data.CacheEntry import CacheEntry
|
199
204
|
|
200
205
|
entry = CacheEntry(
|
201
206
|
model=model,
|
@@ -221,7 +226,6 @@ class Cache(Base):
|
|
221
226
|
|
222
227
|
:param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
|
223
228
|
"""
|
224
|
-
from edsl.data.CacheEntry import CacheEntry
|
225
229
|
|
226
230
|
for key, value in new_data.items():
|
227
231
|
if key in self.data:
|
@@ -242,8 +246,6 @@ class Cache(Base):
|
|
242
246
|
|
243
247
|
:param write_now: Whether to write to the cache immediately (similar to `immediate_write`).
|
244
248
|
"""
|
245
|
-
from edsl.data.CacheEntry import CacheEntry
|
246
|
-
|
247
249
|
with open(filename, "a+") as f:
|
248
250
|
f.seek(0)
|
249
251
|
lines = f.readlines()
|
@@ -287,8 +289,8 @@ class Cache(Base):
|
|
287
289
|
|
288
290
|
CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
|
289
291
|
path = CACHE_PATH.replace("sqlite:///", "")
|
290
|
-
|
291
|
-
return cls.from_sqlite_db(
|
292
|
+
db_path = os.path.join(os.path.dirname(path), "data.db")
|
293
|
+
return cls.from_sqlite_db(db_path=db_path)
|
292
294
|
|
293
295
|
@classmethod
|
294
296
|
def from_jsonl(cls, jsonlfile: str, db_path: Optional[str] = None) -> Cache:
|
@@ -351,8 +353,7 @@ class Cache(Base):
|
|
351
353
|
f.write(json.dumps({key: value.to_dict()}) + "\n")
|
352
354
|
|
353
355
|
def to_scenario_list(self):
|
354
|
-
from edsl
|
355
|
-
from edsl.scenarios.Scenario import Scenario
|
356
|
+
from edsl import ScenarioList, Scenario
|
356
357
|
|
357
358
|
scenarios = []
|
358
359
|
for key, value in self.data.items():
|
@@ -362,32 +363,12 @@ class Cache(Base):
|
|
362
363
|
scenarios.append(s)
|
363
364
|
return ScenarioList(scenarios)
|
364
365
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
:return: A new Cache object containing unique entries
|
372
|
-
|
373
|
-
>>> from edsl.data.CacheEntry import CacheEntry
|
374
|
-
>>> ce1 = CacheEntry.example(randomize = True)
|
375
|
-
>>> ce2 = CacheEntry.example(randomize = True)
|
376
|
-
>>> ce2 = CacheEntry.example(randomize = True)
|
377
|
-
>>> c1 = Cache(data={ce1.key: ce1, ce2.key: ce2})
|
378
|
-
>>> c2 = Cache(data={ce1.key: ce1})
|
379
|
-
>>> c3 = c1 // c2
|
380
|
-
>>> len(c3)
|
381
|
-
1
|
382
|
-
>>> c3.data[ce2.key] == ce2
|
383
|
-
True
|
384
|
-
"""
|
385
|
-
if not isinstance(other, Cache):
|
386
|
-
raise CacheError("Can only compare two caches")
|
387
|
-
|
388
|
-
diff_data = {k: v for k, v in self.data.items() if k not in other.data}
|
389
|
-
return Cache(data=diff_data, immediate_write=self.immediate_write)
|
390
|
-
|
366
|
+
####################
|
367
|
+
# REMOTE
|
368
|
+
####################
|
369
|
+
# TODO: Make this work
|
370
|
+
# - Need to decide whether the cache belongs to a user and what can be shared
|
371
|
+
# - I.e., some cache entries? all or nothing?
|
391
372
|
@classmethod
|
392
373
|
def from_url(cls, db_path=None) -> Cache:
|
393
374
|
"""
|
@@ -413,10 +394,11 @@ class Cache(Base):
|
|
413
394
|
if self.filename:
|
414
395
|
self.write(self.filename)
|
415
396
|
|
397
|
+
####################
|
398
|
+
# DUNDER / USEFUL
|
399
|
+
####################
|
416
400
|
def __hash__(self):
|
417
401
|
"""Return the hash of the Cache."""
|
418
|
-
from edsl.utilities.utilities import dict_hash
|
419
|
-
|
420
402
|
return dict_hash(self.to_dict(add_edsl_version=False))
|
421
403
|
|
422
404
|
def to_dict(self, add_edsl_version=True) -> dict:
|
@@ -432,6 +414,12 @@ class Cache(Base):
|
|
432
414
|
def _summary(self):
|
433
415
|
return {"EDSL Class": "Cache", "Number of entries": len(self.data)}
|
434
416
|
|
417
|
+
def _repr_html_(self):
|
418
|
+
# from edsl.utilities.utilities import data_to_html
|
419
|
+
# return data_to_html(self.to_dict())
|
420
|
+
footer = f"<a href={self.__documentation__}>(docs)</a>"
|
421
|
+
return str(self.summary(format="html")) + footer
|
422
|
+
|
435
423
|
def table(
|
436
424
|
self,
|
437
425
|
*fields,
|
@@ -455,8 +443,6 @@ class Cache(Base):
|
|
455
443
|
@remove_edsl_version
|
456
444
|
def from_dict(cls, data) -> Cache:
|
457
445
|
"""Construct a Cache from a dictionary."""
|
458
|
-
from edsl.data.CacheEntry import CacheEntry
|
459
|
-
|
460
446
|
newdata = {k: CacheEntry.from_dict(v) for k, v in data.items()}
|
461
447
|
return cls(data=newdata)
|
462
448
|
|
@@ -499,8 +485,6 @@ class Cache(Base):
|
|
499
485
|
"""
|
500
486
|
Create an example input for a 'fetch' operation.
|
501
487
|
"""
|
502
|
-
from edsl.data.CacheEntry import CacheEntry
|
503
|
-
|
504
488
|
return CacheEntry.fetch_input_example()
|
505
489
|
|
506
490
|
def to_html(self):
|
@@ -557,8 +541,6 @@ class Cache(Base):
|
|
557
541
|
|
558
542
|
:param randomize: If True, uses CacheEntry's randomize method.
|
559
543
|
"""
|
560
|
-
from edsl.data.CacheEntry import CacheEntry
|
561
|
-
|
562
544
|
return cls(
|
563
545
|
data={
|
564
546
|
CacheEntry.example(randomize).key: CacheEntry.example(),
|
edsl/data/CacheEntry.py
CHANGED
@@ -5,12 +5,8 @@ import hashlib
|
|
5
5
|
from typing import Optional
|
6
6
|
from uuid import uuid4
|
7
7
|
|
8
|
-
from edsl.utilities.decorators import remove_edsl_version
|
9
8
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
class CacheEntry(RepresentationMixin):
|
9
|
+
class CacheEntry:
|
14
10
|
"""
|
15
11
|
A Class to represent a cache entry.
|
16
12
|
"""
|
@@ -82,11 +78,11 @@ class CacheEntry(RepresentationMixin):
|
|
82
78
|
d = {k: value for k, value in self.__dict__.items() if k in self.key_fields}
|
83
79
|
return self.gen_key(**d)
|
84
80
|
|
85
|
-
def to_dict(self
|
81
|
+
def to_dict(self) -> dict:
|
86
82
|
"""
|
87
83
|
Returns a dictionary representation of a CacheEntry.
|
88
84
|
"""
|
89
|
-
|
85
|
+
return {
|
90
86
|
"model": self.model,
|
91
87
|
"parameters": self.parameters,
|
92
88
|
"system_prompt": self.system_prompt,
|
@@ -95,12 +91,19 @@ class CacheEntry(RepresentationMixin):
|
|
95
91
|
"iteration": self.iteration,
|
96
92
|
"timestamp": self.timestamp,
|
97
93
|
}
|
98
|
-
# if add_edsl_version:
|
99
|
-
# from edsl import __version__
|
100
94
|
|
101
|
-
|
102
|
-
|
103
|
-
|
95
|
+
def _repr_html_(self) -> str:
|
96
|
+
"""
|
97
|
+
Returns an HTML representation of a CacheEntry.
|
98
|
+
"""
|
99
|
+
# from edsl.utilities.utilities import data_to_html
|
100
|
+
# return data_to_html(self.to_dict())
|
101
|
+
d = self.to_dict()
|
102
|
+
data = [[k, v] for k, v in d.items()]
|
103
|
+
from tabulate import tabulate
|
104
|
+
|
105
|
+
table = str(tabulate(data, headers=["keys", "values"], tablefmt="html"))
|
106
|
+
return f"<pre>{table}</pre>"
|
104
107
|
|
105
108
|
def keys(self):
|
106
109
|
return list(self.to_dict().keys())
|
edsl/data/CacheHandler.py
CHANGED
@@ -3,19 +3,19 @@ import ast
|
|
3
3
|
import json
|
4
4
|
import os
|
5
5
|
import shutil
|
6
|
-
|
6
|
+
import sqlite3
|
7
|
+
from edsl.config import CONFIG
|
8
|
+
from edsl.data.Cache import Cache
|
9
|
+
from edsl.data.CacheEntry import CacheEntry
|
10
|
+
from edsl.data.SQLiteDict import SQLiteDict
|
7
11
|
|
8
|
-
|
9
|
-
from edsl.data.Cache import Cache
|
10
|
-
from edsl.data.CacheEntry import CacheEntry
|
12
|
+
from edsl.config import CONFIG
|
11
13
|
|
12
14
|
|
13
|
-
def set_session_cache(cache:
|
15
|
+
def set_session_cache(cache: Cache) -> None:
|
14
16
|
"""
|
15
17
|
Set the session cache.
|
16
18
|
"""
|
17
|
-
from edsl.config import CONFIG
|
18
|
-
|
19
19
|
CONFIG.EDSL_SESSION_CACHE = cache
|
20
20
|
|
21
21
|
|
@@ -23,8 +23,6 @@ def unset_session_cache() -> None:
|
|
23
23
|
"""
|
24
24
|
Unset the session cache.
|
25
25
|
"""
|
26
|
-
from edsl.config import CONFIG
|
27
|
-
|
28
26
|
if hasattr(CONFIG, "EDSL_SESSION_CACHE"):
|
29
27
|
del CONFIG.EDSL_SESSION_CACHE
|
30
28
|
|
@@ -34,11 +32,7 @@ class CacheHandler:
|
|
34
32
|
This CacheHandler figures out what caches are available and does migrations, as needed.
|
35
33
|
"""
|
36
34
|
|
37
|
-
|
38
|
-
def CACHE_PATH(self):
|
39
|
-
from edsl.config import CONFIG
|
40
|
-
|
41
|
-
return CONFIG.get("EDSL_DATABASE_PATH")
|
35
|
+
CACHE_PATH = CONFIG.get("EDSL_DATABASE_PATH")
|
42
36
|
|
43
37
|
def __init__(self, test: bool = False):
|
44
38
|
self.test = test
|
@@ -58,22 +52,16 @@ class CacheHandler:
|
|
58
52
|
if notify:
|
59
53
|
print(f"Created cache directory: {dir_path}")
|
60
54
|
|
61
|
-
def gen_cache(self) ->
|
55
|
+
def gen_cache(self) -> Cache:
|
62
56
|
"""
|
63
57
|
Generate a Cache object.
|
64
58
|
"""
|
65
|
-
from edsl.data.Cache import Cache
|
66
|
-
|
67
59
|
if self.test:
|
68
60
|
return Cache(data={})
|
69
61
|
|
70
|
-
from edsl.config import CONFIG
|
71
|
-
|
72
62
|
if hasattr(CONFIG, "EDSL_SESSION_CACHE"):
|
73
63
|
return CONFIG.EDSL_SESSION_CACHE
|
74
64
|
|
75
|
-
from edsl.data.SQLiteDict import SQLiteDict
|
76
|
-
|
77
65
|
cache = Cache(data=SQLiteDict(self.CACHE_PATH))
|
78
66
|
return cache
|
79
67
|
|
@@ -88,8 +76,6 @@ class CacheHandler:
|
|
88
76
|
if not os.path.exists(os.path.join(os.getcwd(), path)):
|
89
77
|
return old_data
|
90
78
|
try:
|
91
|
-
import sqlite3
|
92
|
-
|
93
79
|
conn = sqlite3.connect(path)
|
94
80
|
with conn:
|
95
81
|
cur = conn.cursor()
|
@@ -122,8 +108,6 @@ class CacheHandler:
|
|
122
108
|
entry_dict["user_prompt"] = entry_dict.pop("prompt")
|
123
109
|
parameters = entry_dict["parameters"]
|
124
110
|
entry_dict["parameters"] = ast.literal_eval(parameters)
|
125
|
-
from edsl.data.CacheEntry import CacheEntry
|
126
|
-
|
127
111
|
entry = CacheEntry(**entry_dict)
|
128
112
|
return entry
|
129
113
|
|
@@ -133,7 +117,7 @@ class CacheHandler:
|
|
133
117
|
###############
|
134
118
|
# NOT IN USE
|
135
119
|
###############
|
136
|
-
def from_sqlite(uri="new_edsl_cache.db") -> dict[str,
|
120
|
+
def from_sqlite(uri="new_edsl_cache.db") -> dict[str, CacheEntry]:
|
137
121
|
"""
|
138
122
|
Read in a new-style sqlite cache and return a dictionary of dictionaries.
|
139
123
|
"""
|
@@ -147,7 +131,7 @@ class CacheHandler:
|
|
147
131
|
newdata[entry.key] = entry
|
148
132
|
return newdata
|
149
133
|
|
150
|
-
def from_jsonl(filename="edsl_cache.jsonl") -> dict[str,
|
134
|
+
def from_jsonl(filename="edsl_cache.jsonl") -> dict[str, CacheEntry]:
|
151
135
|
"""Read in a jsonl file and return a dictionary of CacheEntry objects."""
|
152
136
|
with open(filename, "a+") as f:
|
153
137
|
f.seek(0)
|
@@ -162,7 +146,4 @@ class CacheHandler:
|
|
162
146
|
|
163
147
|
|
164
148
|
if __name__ == "__main__":
|
165
|
-
|
166
|
-
import doctest
|
167
|
-
|
168
|
-
doctest.testmod()
|
149
|
+
ch = CacheHandler()
|
edsl/data/RemoteCacheSync.py
CHANGED
@@ -1,166 +1,71 @@
|
|
1
|
-
|
2
|
-
from dataclasses import dataclass
|
3
|
-
from contextlib import AbstractContextManager
|
4
|
-
from collections import UserList
|
5
|
-
|
6
|
-
if TYPE_CHECKING:
|
7
|
-
from .Cache import Cache
|
8
|
-
from edsl.coop.coop import Coop
|
9
|
-
from .CacheEntry import CacheEntry
|
10
|
-
|
11
|
-
from logging import Logger
|
12
|
-
|
13
|
-
|
14
|
-
class CacheKeyList(UserList):
|
15
|
-
def __init__(self, data: List[str]):
|
16
|
-
super().__init__(data)
|
17
|
-
self.data = data
|
18
|
-
|
19
|
-
def __repr__(self):
|
20
|
-
import reprlib
|
21
|
-
|
22
|
-
keys_repr = reprlib.repr(self.data)
|
23
|
-
return f"CacheKeyList({keys_repr})"
|
24
|
-
|
25
|
-
|
26
|
-
class CacheEntriesList(UserList):
|
27
|
-
def __init__(self, data: List["CacheEntry"]):
|
28
|
-
super().__init__(data)
|
29
|
-
self.data = data
|
30
|
-
|
31
|
-
def __repr__(self):
|
32
|
-
import reprlib
|
33
|
-
|
34
|
-
entries_repr = reprlib.repr(self.data)
|
35
|
-
return f"CacheEntries({entries_repr})"
|
36
|
-
|
37
|
-
def to_cache(self) -> "Cache":
|
38
|
-
from edsl.data.Cache import Cache
|
39
|
-
|
40
|
-
return Cache({entry.key: entry for entry in self.data})
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class CacheDifference:
|
45
|
-
client_missing_entries: CacheEntriesList
|
46
|
-
server_missing_keys: List[str]
|
47
|
-
|
48
|
-
def __repr__(self):
|
49
|
-
"""Returns a string representation of the CacheDifference object."""
|
50
|
-
import reprlib
|
51
|
-
|
52
|
-
missing_entries_repr = reprlib.repr(self.client_missing_entries)
|
53
|
-
missing_keys_repr = reprlib.repr(self.server_missing_keys)
|
54
|
-
return f"CacheDifference(client_missing_entries={missing_entries_repr}, server_missing_keys={missing_keys_repr})"
|
55
|
-
|
56
|
-
|
57
|
-
class RemoteCacheSync(AbstractContextManager):
|
58
|
-
"""Synchronizes a local cache with a remote cache.
|
59
|
-
|
60
|
-
Handles bidirectional synchronization:
|
61
|
-
- Downloads missing entries from remote to local cache
|
62
|
-
- Uploads new local entries to remote cache
|
63
|
-
"""
|
64
|
-
|
1
|
+
class RemoteCacheSync:
|
65
2
|
def __init__(
|
66
|
-
self,
|
67
|
-
coop: "Coop",
|
68
|
-
cache: "Cache",
|
69
|
-
output_func: Callable,
|
70
|
-
remote_cache: bool = True,
|
71
|
-
remote_cache_description: str = "",
|
3
|
+
self, coop, cache, output_func, remote_cache=True, remote_cache_description=""
|
72
4
|
):
|
73
|
-
"""
|
74
|
-
Initializes a RemoteCacheSync object.
|
75
|
-
|
76
|
-
:param coop: Coop object for interacting with the remote cache
|
77
|
-
:param cache: Cache object for local cache
|
78
|
-
:param output_func: Function for outputting messages
|
79
|
-
:param remote_cache: Whether to enable remote cache synchronization
|
80
|
-
:param remote_cache_description: Description for remote cache entries
|
81
|
-
|
82
|
-
"""
|
83
5
|
self.coop = coop
|
84
6
|
self.cache = cache
|
85
7
|
self._output = output_func
|
86
|
-
self.
|
8
|
+
self.remote_cache = remote_cache
|
9
|
+
self.old_entry_keys = []
|
10
|
+
self.new_cache_entries = []
|
87
11
|
self.remote_cache_description = remote_cache_description
|
88
|
-
self.initial_cache_keys = []
|
89
12
|
|
90
|
-
def __enter__(self)
|
91
|
-
if self.
|
13
|
+
def __enter__(self):
|
14
|
+
if self.remote_cache:
|
92
15
|
self._sync_from_remote()
|
93
|
-
self.
|
16
|
+
self.old_entry_keys = list(self.cache.keys())
|
94
17
|
return self
|
95
18
|
|
96
19
|
def __exit__(self, exc_type, exc_value, traceback):
|
97
|
-
if self.
|
20
|
+
if self.remote_cache:
|
98
21
|
self._sync_to_remote()
|
99
22
|
return False # Propagate exceptions
|
100
23
|
|
101
|
-
def
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
client_missing_entries=diff.get("client_missing_cacheentries", []),
|
106
|
-
server_missing_keys=diff.get("server_missing_cacheentry_keys", []),
|
24
|
+
def _sync_from_remote(self):
|
25
|
+
cache_difference = self.coop.remote_cache_get_diff(self.cache.keys())
|
26
|
+
client_missing_cacheentries = cache_difference.get(
|
27
|
+
"client_missing_cacheentries", []
|
107
28
|
)
|
29
|
+
missing_entry_count = len(client_missing_cacheentries)
|
108
30
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
31
|
+
if missing_entry_count > 0:
|
32
|
+
self._output(
|
33
|
+
f"Updating local cache with {missing_entry_count:,} new "
|
34
|
+
f"{'entry' if missing_entry_count == 1 else 'entries'} from remote..."
|
35
|
+
)
|
36
|
+
self.cache.add_from_dict(
|
37
|
+
{entry.key: entry for entry in client_missing_cacheentries}
|
38
|
+
)
|
39
|
+
self._output("Local cache updated!")
|
40
|
+
else:
|
115
41
|
self._output("No new entries to add to local cache.")
|
116
|
-
return
|
117
42
|
|
118
|
-
|
119
|
-
|
120
|
-
|
43
|
+
def _sync_to_remote(self):
|
44
|
+
cache_difference = self.coop.remote_cache_get_diff(self.cache.keys())
|
45
|
+
server_missing_cacheentry_keys = cache_difference.get(
|
46
|
+
"server_missing_cacheentry_keys", []
|
121
47
|
)
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
)
|
138
|
-
|
139
|
-
# Get newly added entries since sync started
|
140
|
-
new_entries = CacheEntriesList(
|
141
|
-
[
|
142
|
-
entry
|
143
|
-
for entry in self.cache.values()
|
144
|
-
if entry.key not in self.initial_cache_keys
|
145
|
-
]
|
146
|
-
)
|
147
|
-
|
148
|
-
return server_missing_entries + new_entries
|
149
|
-
|
150
|
-
def _sync_to_remote(self) -> None:
|
151
|
-
"""Uploads new local entries to remote cache."""
|
152
|
-
diff: CacheDifference = self._get_cache_difference()
|
153
|
-
entries_to_upload: CacheEntriesList = self._get_entries_to_upload(diff)
|
154
|
-
upload_count = len(entries_to_upload)
|
155
|
-
|
156
|
-
if upload_count > 0:
|
48
|
+
server_missing_cacheentries = [
|
49
|
+
entry
|
50
|
+
for key in server_missing_cacheentry_keys
|
51
|
+
if (entry := self.cache.data.get(key)) is not None
|
52
|
+
]
|
53
|
+
|
54
|
+
new_cache_entries = [
|
55
|
+
entry
|
56
|
+
for entry in self.cache.values()
|
57
|
+
if entry.key not in self.old_entry_keys
|
58
|
+
]
|
59
|
+
server_missing_cacheentries.extend(new_cache_entries)
|
60
|
+
new_entry_count = len(server_missing_cacheentries)
|
61
|
+
|
62
|
+
if new_entry_count > 0:
|
157
63
|
self._output(
|
158
|
-
f"Updating remote cache with {
|
159
|
-
f"{'entry' if
|
64
|
+
f"Updating remote cache with {new_entry_count:,} new "
|
65
|
+
f"{'entry' if new_entry_count == 1 else 'entries'}..."
|
160
66
|
)
|
161
|
-
|
162
67
|
self.coop.remote_cache_create_many(
|
163
|
-
|
68
|
+
server_missing_cacheentries,
|
164
69
|
visibility="private",
|
165
70
|
description=self.remote_cache_description,
|
166
71
|
)
|
@@ -171,16 +76,3 @@ class RemoteCacheSync(AbstractContextManager):
|
|
171
76
|
self._output(
|
172
77
|
f"There are {len(self.cache.keys()):,} entries in the local cache."
|
173
78
|
)
|
174
|
-
|
175
|
-
|
176
|
-
if __name__ == "__main__":
|
177
|
-
import doctest
|
178
|
-
|
179
|
-
doctest.testmod()
|
180
|
-
|
181
|
-
from edsl.coop.coop import Coop
|
182
|
-
from edsl.data.Cache import Cache
|
183
|
-
from edsl.data.CacheEntry import CacheEntry
|
184
|
-
|
185
|
-
r = RemoteCacheSync(Coop(), Cache(), print)
|
186
|
-
diff = r._get_cache_difference()
|
edsl/data/__init__.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
from edsl.data.CacheEntry import CacheEntry
|
2
|
+
from edsl.data.SQLiteDict import SQLiteDict
|
3
3
|
from edsl.data.Cache import Cache
|
4
|
-
|
5
|
-
# from edsl.data.CacheHandler import CacheHandler
|
4
|
+
from edsl.data.CacheHandler import CacheHandler
|
edsl/data_transfer_models.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import NamedTuple, Dict, List, Optional, Any
|
2
2
|
from dataclasses import dataclass, fields
|
3
|
+
import reprlib
|
3
4
|
|
4
5
|
|
5
6
|
class ModelInputs(NamedTuple):
|
@@ -55,8 +56,6 @@ class ImageInfo:
|
|
55
56
|
encoded_image: str
|
56
57
|
|
57
58
|
def __repr__(self):
|
58
|
-
import reprlib
|
59
|
-
|
60
59
|
reprlib_instance = reprlib.Repr()
|
61
60
|
reprlib_instance.maxstring = 30 # Limit the string length for the encoded image
|
62
61
|
|
edsl/enums.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
"""Enums for the different types of questions, language models, and inference services."""
|
2
2
|
|
3
3
|
from enum import Enum
|
4
|
-
from typing import Literal
|
5
4
|
|
6
5
|
|
7
6
|
class EnumWithChecks(Enum):
|
@@ -68,32 +67,6 @@ class InferenceServiceType(EnumWithChecks):
|
|
68
67
|
PERPLEXITY = "perplexity"
|
69
68
|
|
70
69
|
|
71
|
-
# unavoidable violation of the DRY principle but it is necessary
|
72
|
-
# checked w/ a unit test to make sure consistent with services in enums.py
|
73
|
-
InferenceServiceLiteral = Literal[
|
74
|
-
"bedrock",
|
75
|
-
"deep_infra",
|
76
|
-
"replicate",
|
77
|
-
"openai",
|
78
|
-
"google",
|
79
|
-
"test",
|
80
|
-
"anthropic",
|
81
|
-
"groq",
|
82
|
-
"azure",
|
83
|
-
"ollama",
|
84
|
-
"mistral",
|
85
|
-
"together",
|
86
|
-
"perplexity",
|
87
|
-
]
|
88
|
-
|
89
|
-
available_models_urls = {
|
90
|
-
"anthropic": "https://docs.anthropic.com/en/docs/about-claude/models",
|
91
|
-
"openai": "https://platform.openai.com/docs/models/gp",
|
92
|
-
"groq": "https://console.groq.com/docs/models",
|
93
|
-
"google": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models",
|
94
|
-
}
|
95
|
-
|
96
|
-
|
97
70
|
service_to_api_keyname = {
|
98
71
|
InferenceServiceType.BEDROCK.value: "TBD",
|
99
72
|
InferenceServiceType.DEEP_INFRA.value: "DEEP_INFRA_API_KEY",
|