PyPI - edsl - Versions diffs - 0.1.45__py3-none-any.whl → 0.1.47__py3-none-any.whl - Mend

edsl 0.1.45py3-none-any.whl → 0.1.47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

edsl/Base.py +87 -16
edsl/__version__.py +1 -1
edsl/agents/PromptConstructor.py +26 -79
edsl/agents/QuestionInstructionPromptBuilder.py +70 -32
edsl/agents/QuestionTemplateReplacementsBuilder.py +12 -2
edsl/coop/coop.py +289 -147
edsl/data/Cache.py +2 -0
edsl/data/CacheEntry.py +10 -2
edsl/data/RemoteCacheSync.py +10 -9
edsl/inference_services/AvailableModelFetcher.py +1 -1
edsl/inference_services/PerplexityService.py +9 -5
edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
edsl/jobs/Jobs.py +35 -17
edsl/jobs/JobsComponentConstructor.py +2 -1
edsl/jobs/JobsPrompts.py +49 -26
edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
edsl/jobs/data_structures.py +3 -0
edsl/jobs/interviews/Interview.py +6 -3
edsl/language_models/LanguageModel.py +7 -1
edsl/questions/QuestionBase.py +5 -0
edsl/questions/question_base_gen_mixin.py +2 -0
edsl/questions/question_registry.py +6 -7
edsl/results/DatasetExportMixin.py +124 -6
edsl/results/Results.py +59 -0
edsl/scenarios/FileStore.py +112 -7
edsl/scenarios/ScenarioList.py +283 -21
edsl/study/Study.py +2 -2
edsl/surveys/Survey.py +15 -20
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/METADATA +4 -3
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/RECORD +32 -44
edsl/auto/AutoStudy.py +0 -130
edsl/auto/StageBase.py +0 -243
edsl/auto/StageGenerateSurvey.py +0 -178
edsl/auto/StageLabelQuestions.py +0 -125
edsl/auto/StagePersona.py +0 -61
edsl/auto/StagePersonaDimensionValueRanges.py +0 -88
edsl/auto/StagePersonaDimensionValues.py +0 -74
edsl/auto/StagePersonaDimensions.py +0 -69
edsl/auto/StageQuestions.py +0 -74
edsl/auto/SurveyCreatorPipeline.py +0 -21
edsl/auto/utilities.py +0 -218
edsl/base/Base.py +0 -279
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/LICENSE +0 -0
{edsl-0.1.45.dist-info → edsl-0.1.47.dist-info}/WHEEL +0 -0

edsl/auto/StagePersona.py DELETED Viewed

@@ -1,61 +0,0 @@
-from textwrap import dedent
-from dataclasses import dataclass
-from typing import List
-from edsl.auto.StageBase import StageBase
-from edsl.auto.StageBase import FlowDataBase
-from edsl import Model
-from edsl.auto.StageQuestions import StageQuestions
-from edsl.questions import QuestionFreeText
-from edsl.scenarios import Scenario
-from edsl.auto.utilities import gen_pipeline
-class StagePersona(StageBase):
-    input = StageQuestions.output
-    @dataclass
-    class Output(FlowDataBase):
-        persona: str
-        questions: List[str]
-    output = Output
-    def handle_data(self, data):
-        m = Model()
-        q_persona = QuestionFreeText(
-            question_text=dedent(
-                """\
-        Imagine a person from the population {{ population }} responding to these questions: "{{ questions }}"
-        Make up a 1 paragraph persona for this person who would have answers for these questions.
-        """
-            ),
-            question_name="persona",
-        )
-        results = (
-            q_persona.by(m)
-            .by(Scenario({"questions": data.questions, "population": data.population}))
-            .run()
-        )
-        print("Constructing a persona that could answer the following questions:")
-        print(data.questions)
-        results.select("persona").print(
-            pretty_labels={
-                "answer.persona": f"Persona that can answer: {data.questions}"
-            },
-            split_at_dot=False,
-        )
-        persona = results.select("persona").first()
-        return self.output(persona=persona, questions=data.questions)
-if __name__ == "__main__":
-    pipeline = gen_pipeline([StageQuestions, StagePersona])
-    pipeline.process(
-        pipeline.input(
-            overall_question="What are some factors that could determine whether someone likes ice cream?",
-            persona="People",
-        )
-    )

edsl/auto/StagePersonaDimensionValueRanges.py DELETED Viewed

@@ -1,88 +0,0 @@
-from textwrap import dedent
-from dataclasses import dataclass
-from typing import List
-from edsl.auto.StageBase import StageBase
-from edsl.auto.StageBase import FlowDataBase
-from edsl.auto.StagePersonaDimensionValues import StagePersonaDimensionValues
-from edsl.questions import QuestionList
-from edsl.scenarios import Scenario
-from edsl import Model
-from edsl.auto.utilities import gen_pipeline
-class StagePersonaDimensionValueRanges(StageBase):
-    input = StagePersonaDimensionValues.output
-    @dataclass
-    class Output(FlowDataBase):
-        focal_dimension_values: List[dict]
-        mapping: dict
-        persona: str
-    output = Output
-    def handle_data(self, data):
-        # breakpoint()
-        """Goal with this stage is to, for each dimension, get a range of values that the persona might have for that dimension."""
-        dimension_values = data["dimension_values"]
-        attribute_results = data["attribute_results"]
-        persona = data["persona"]
-        m = Model()
-        d = dict(zip(attribute_results, dimension_values))
-        q = QuestionList(
-            question_text=dedent(
-                """\
-            Consider the following persona: {{ persona }}.
-            They were categorized as having the following attributes: {{ d }}.
-            For this dimension: {{ focal_dimension }},
-            What are values that other people might have on this attribute?
-            """
-            ),
-            question_name="focal_dimension_values",
-        )
-        s = [
-            Scenario({"persona": persona, "d": d, "focal_dimension": k})
-            for k in d.keys()
-        ]
-        results = q.by(s).by(m).run()
-        # breakpoint()
-        results.select("focal_dimension", "answer.*").print(
-            pretty_labels={
-                "scenario.focal_dimension": f"Dimensions of a persona",
-                "answer.focal_dimension_values": f"Values a person might have for that dimension",
-            },
-            split_at_dot=False,
-        )
-        focal_dimension_values = results.select("focal_dimension_values").to_list()
-        mapping = dict(zip(attribute_results, focal_dimension_values))
-        return self.output(
-            focal_dimension_values=focal_dimension_values,
-            mapping=mapping,
-            persona=persona,
-        )
-if __name__ == "__main__":
-    from edsl.auto.StageQuestions import StageQuestions
-    from edsl.auto.StagePersona import StagePersona
-    from edsl.auto.StagePersonaDimensions import StagePersonaDimensions
-    pipeline = gen_pipeline(
-        [
-            StageQuestions,
-            StagePersona,
-            StagePersonaDimensions,
-            StagePersonaDimensionValues,
-            StagePersonaDimensionValueRanges,
-        ]
-    )
-    pipeline.process(
-        pipeline.input(
-            overall_question="What are some factors that could determine whether someone likes ice cream?"
-        )
-    )

edsl/auto/StagePersonaDimensionValues.py DELETED Viewed

@@ -1,74 +0,0 @@
-from textwrap import dedent
-from dataclasses import dataclass
-from typing import List, Dict
-from edsl.auto.StageBase import StageBase
-from edsl.auto.StageBase import FlowDataBase
-from edsl.auto.StagePersonaDimensions import StagePersonaDimensions
-from edsl import Model
-from edsl.questions import QuestionList, QuestionExtract
-from edsl.scenarios import Scenario
-from edsl.auto.utilities import gen_pipeline
-class StagePersonaDimensionValues(StageBase):
-    input = StagePersonaDimensions.output
-    @dataclass
-    class Output(FlowDataBase):
-        attribute_results: List[str]
-        dimension_values: Dict[str, str]
-        persona: str
-    output = Output
-    def handle_data(self, data):
-        attribute_results = data.attribute_results
-        persona = data.persona
-        m = Model()
-        q = QuestionExtract(
-            question_text=dedent(
-                """\
-            This is a persona: "{{ persona }}"
-            They vary on the following dimensions: "{{ attribute_results }}"
-            For each dimenion, what are some values that this persona might have for that dimension?
-            Please keep answers very short, ideally one word.
-            """
-            ),
-            answer_template={k: None for k in attribute_results},
-            question_name="dimension_values",
-        )
-        results = (
-            q.by(Scenario({"attribute_results": attribute_results, "persona": persona}))
-            .by(m)
-            .run()
-        )
-        results.select("attribute_results", "dimension_values").print()
-        return self.output(
-            dimension_values=results.select("dimension_values").first(),
-            attribute_results=attribute_results,
-            persona=persona,
-        )
-if __name__ == "__main__":
-    from edsl.auto.StageQuestions import StageQuestions
-    from edsl.auto.StagePersona import StagePersona
-    from edsl.auto.StagePersonaDimensions import StagePersonaDimensions
-    pipeline = gen_pipeline(
-        [
-            StageQuestions,
-            StagePersona,
-            StagePersonaDimensions,
-            StagePersonaDimensionValues,
-        ]
-    )
-    pipeline.process(
-        pipeline.input(
-            overall_question="What are some factors that could determine whether someone likes ice cream?"
-        )
-    )

edsl/auto/StagePersonaDimensions.py DELETED Viewed

@@ -1,69 +0,0 @@
-from textwrap import dedent
-from dataclasses import dataclass
-from typing import List
-from edsl.auto.StageBase import StageBase
-from edsl.auto.StageBase import FlowDataBase
-from edsl.auto.StagePersona import StagePersona
-from edsl.questions import QuestionList
-from edsl.scenarios import Scenario
-from edsl import Model
-from edsl.auto.utilities import gen_pipeline
-class StagePersonaDimensions(StageBase):
-    input = StagePersona.output
-    @dataclass
-    class Output(FlowDataBase):
-        attribute_results: List[str]
-        persona: str
-    output = Output
-    def handle_data(self, data):
-        q_attributes = QuestionList(
-            question_text=dedent(
-                """\
-            Here is a persona: "{{ persona }}"
-            It was construced to be someone who could answer these questions: "{{ questions }}"
-            We want to identify the general dimensions that make up this persona.
-            E.g., if the person is desribed as 'happy' then a dimenion would be  'mood'
-            """
-            ),
-            question_name="find_attributes",
-        )
-        m = Model()
-        results = (
-            q_attributes.by(
-                Scenario({"persona": data.persona, "questions": data.questions})
-            )
-            .by(m)
-            .run()
-        )
-        (
-            results.select("find_attributes").print(
-                pretty_labels={
-                    "answer.find_attributes": f'Persona dimensions for: "{data.persona}"'
-                },
-                split_at_dot=False,
-            )
-        )
-        attribute_results = results.select("find_attributes").first()
-        return self.output(attribute_results=attribute_results, persona=data.persona)
-if __name__ == "__main__":
-    from edsl.auto.StageQuestions import StageQuestions
-    pipeline = gen_pipeline([StageQuestions, StagePersona, StagePersonaDimensions])
-    pipeline.process(
-        pipeline.input(
-            overall_question="What are some factors that could determine whether someone likes ice cream?"
-        )
-    )

edsl/auto/StageQuestions.py DELETED Viewed

@@ -1,74 +0,0 @@
-from dataclasses import dataclass
-from typing import List
-from textwrap import dedent
-from edsl import Scenario
-from edsl import Model
-from edsl.questions.QuestionList import QuestionList
-from edsl.auto.StageBase import StageBase
-from edsl.auto.StageBase import FlowDataBase
-from edsl.auto.utilities import gen_pipeline
-class StageQuestions(StageBase):
-    "This stages takes as input an overall question and returns a list of questions"
-    @dataclass
-    class Input(FlowDataBase):
-        overall_question: str
-        population: str
-    @dataclass
-    class Output(FlowDataBase):
-        questions: List[str]
-        population: str
-    input = Input
-    output = Output
-    def handle_data(self, data):
-        m = Model()
-        overall_question = data.overall_question
-        population = data.population
-        s = Scenario({"overall_question": overall_question, "population": population})
-        q = QuestionList(
-            question_text=dedent(
-                """\
-            Suppose I am interested in the question:
-            "{{ overall_question }}"
-            What would be some survey questions I could ask to {{ population }} that might shed light on this question?
-            """
-            ),
-            question_name="questions",
-        )
-        results = q.by(s).by(m).run()
-        (
-            results.select("questions").print(
-                pretty_labels={
-                    "answer.questions": f'Questions for overall question: "{overall_question }"'
-                },
-                split_at_dot=False,
-            )
-        )
-        raw_questions = results.select("questions").first()
-        questions = [q.replace("'", "").replace(":", "") for q in raw_questions]
-        return self.Output(questions=questions, population=population)
-if __name__ == "__main__":
-    pipeline = gen_pipeline([StageQuestions])
-    pipeline.process(
-        pipeline.input(
-            overall_question="What are some factors that could determine whether someone likes ice cream?",
-            population="Consumers",
-        )
-    )
-    results = StageQuestions.func(
-        overall_question="Why aren't my students studying more?", population="Tech"
-    )

edsl/auto/SurveyCreatorPipeline.py DELETED Viewed

@@ -1,21 +0,0 @@
-import random
-from typing import Dict, List, Any, TypeVar, Generator, Optional
-from textwrap import dedent
-# from edsl.language_models.model_interfaces.LanguageModelOpenAIFour import LanguageModelOpenAIFour
-from edsl import Model
-from edsl.agents.AgentList import AgentList
-from edsl.results.Results import Results
-from edsl import Agent
-from edsl import Scenario
-from edsl.surveys.Survey import Survey
-from edsl.questions.QuestionMultipleChoice import QuestionMultipleChoice
-from edsl.questions.QuestionFreeText import QuestionFreeText
-from edsl.auto.utilities import gen_pipeline
-from edsl.utilities.naming_utilities import sanitize_string
-m = Model()

edsl/auto/utilities.py DELETED Viewed

@@ -1,218 +0,0 @@
-from textwrap import dedent
-import random
-from typing import List, TypeVar, Generator, Optional
-from edsl.auto.StageBase import StageBase
-from edsl.utilities.naming_utilities import sanitize_string
-from edsl import Agent, Survey, Model, Cache, AgentList
-from edsl import QuestionFreeText, Scenario
-from edsl import QuestionMultipleChoice, Scenario, Agent, ScenarioList
-StageClassType = TypeVar("StageClassType", bound=StageBase)
-def gen_pipeline(stages_list: List[StageClassType]) -> StageBase:
-    """Takes as input a list of Stage classes & returns a pipeline of instantiated stages.
-    A pipeline is a linked list of stages where each stage has a next_stage attribute.
-    """
-    pipeline = stages_list[0]()
-    last_stage = pipeline
-    for stage in stages_list[1:]:
-        while last_stage.next_stage is not None:  # find the end of the pipeline
-            last_stage = last_stage.next_stage
-        stage_to_add = stage()
-        last_stage.next_stage = stage_to_add
-    return pipeline
-q_eligibility = QuestionMultipleChoice(
-    question_text=dedent(
-        """\
-        Consider this set of question: '{{ questions }}'.
-        Consider this persona: '{{ persona }}'.
-        Would this persona be able to answer all of these questions?
-        """
-    ),
-    question_options=["No", "Yes"],
-    question_name="eligibility",
-)
-def agent_list_eligibility(
-    agent_list: AgentList,
-    survey: Optional[Survey] = None,
-    model: Optional[Model] = None,
-    cache: Optional[Cache] = None,
-) -> List[bool]:
-    """
-    Returns whether each agent in a list is elgible for a survey i.e., can answer every question.
-    >>> from edsl.language_models import LanguageModel
-    >>> m = LanguageModel.example(canned_response = "1", test_model = True)
-    >>> agent_list_eligibility(AgentList.example())
-    [True, True]
-    >>> agent_list_eligibility(AgentList.example().add_trait('persona', 2*["Cool dude"]), survey = Survey.example(), model = m)
-    [True, True]
-    """
-    if survey is None:
-        return [True] * len(agent_list)
-    if "persona" not in agent_list.all_traits:
-        raise ValueError(
-            f"Each agent needs to have a persona attribute; traits are {agent_list.all_traits}"
-        )
-    sl = agent_list.select("persona").to_scenario_list()
-    sl.add_value("questions", [q.question_text for q in survey._questions])
-    results = q_eligibility.by(sl).by(model).run(cache=cache)
-    return [r == "Yes" for r in results.select("eligibility").to_list()]
-def agent_eligibility(
-    agent: Agent,
-    survey: Survey,
-    model: Optional[Model] = None,
-    cache: Optional[Cache] = None,
-) -> bool:
-    """NB: This could be parallelized.
-    >>> from edsl.language_models import LanguageModel
-    >>> m = LanguageModel.example(canned_response = "1", test_model = True)
-    >>> agent_eligibility(agent = Agent.example().add_trait({'persona': "Persona"}), survey = Survey.example(), model = m)
-    True
-    """
-    model = model or Model()
-    questions = [q.question_text for q in survey._questions]
-    persona = agent.traits["persona"]
-    return (
-        q_eligibility(model=model, questions=questions, persona=persona, cache=cache)
-        == "Yes"
-    )
-def gen_agent_traits(dimension_dict: dict, seed_value: Optional[str] = None):
-    """
-    >>> dimension_dict = {'attitude':['positive', 'negative']}
-    >>> ag = gen_agent_traits(dimension_dict)
-    >>> a = next(ag)
-    >>> a == {'attitude': 'positive'} or a == {'attitude': 'negative'}
-    True
-    >>> len([next(ag) for _ in range(100)])
-    100
-    """
-    if seed_value is None:
-        seed_value = "edsl"
-    random.seed(seed_value)
-    while True:
-        new_agent_traits = {}
-        for key, list_of_values in dimension_dict.items():
-            new_agent_traits[key] = random.choice(list_of_values)
-        yield new_agent_traits
-def agent_generator(
-    persona: str,
-    dimension_dict: dict,
-    model: Optional[Model] = None,
-    cache: Optional["Cache"] = None,
-) -> Generator["Results", None, None]:
-    """
-    >>> from edsl.language_models import LanguageModel
-    >>> m = LanguageModel.example(canned_response = "This is a cool dude.", test_model = True)
-    >>> ag = agent_generator(persona = "Base person", dimension_dict = {'attitude':['Positive', 'Negative']}, model = m)
-    >>> next(ag).select('new_agent_persona').first()
-    'This is a cool dude.'
-    >>> next(ag).select('new_agent_persona').first()
-    'This is a cool dude.'
-    """
-    if model is None:
-        model = Model()
-    q = QuestionFreeText(
-        question_text=dedent(
-            """\
-    Consider this persona: '{{ persona }}'.
-    Now imagine writing a new persona with these traits:
-    '{{ new_agent_traits }}'
-    Please write this persona as a narrative.
-    """
-        ),
-        question_name="new_agent_persona",
-    )
-    agent_trait_generator = gen_agent_traits(dimension_dict)
-    codebook = {sanitize_string(k): k for k in dimension_dict.keys()}
-    while True:
-        new_agent_traits = next(agent_trait_generator)
-        yield q(
-            persona=persona,
-            new_agent_traits=new_agent_traits,
-            codebook=codebook,
-            just_answer=False,
-            cache=cache,
-            model=model,
-        )
-def create_agents(
-    agent_generator: Generator["Results", None, None],
-    survey: Optional[Survey] = None,
-    num_agents=11,
-) -> AgentList:
-    """
-    >>> from edsl.language_models import LanguageModel
-    >>> m = LanguageModel.example(canned_response = "This is a cool dude.", test_model = True)
-    >>> ag = agent_generator(persona = "Base person", dimension_dict = {'attitude':['Positive', 'Negative']}, model = m)
-    >>> new_agent_list = create_agents(agent_generator = ag)
-    >>> new_agent_list
-    """
-    agent_list = AgentList([])
-    MAX_ITERATIONS_MULTIPLIER = 2
-    iterations = 0
-    while len(agent_list) < num_agents:
-        iterations += 1
-        candidate_agent = next(agent_generator)
-        codebook = candidate_agent.select("codebook").to_list()[0]
-        koobedoc = {v: k for k, v in codebook.items()}
-        persona = candidate_agent.select("new_agent_persona").to_list()[0]
-        traits = candidate_agent.select("new_agent_traits").to_list()[0]
-        new_traits = {koobedoc[key]: value for key, value in traits.items()} | {
-            "persona": persona
-        }
-        agent = Agent(traits=new_traits, codebook=codebook)
-        if survey is not None:
-            if agent_eligibility(agent, survey):
-                agent_list.append(agent)
-            else:
-                print("Agent not eligible")
-        else:
-            agent_list.append(agent)
-        if iterations > MAX_ITERATIONS_MULTIPLIER * num_agents:
-            raise Exception("Too many failures")
-    return agent_list
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
-    # from edsl.language_models import LanguageModel
-    # m = LanguageModel.example(canned_response="This is a cool dude.", test_model=True)
-    # ag = agent_generator(
-    #     persona="Base person",
-    #     dimension_dict={"attitude": ["Positive", "Negative"]},
-    #     model=m,
-    # )
-    # example = [next(ag).select("new_agent_persona").first() for _ in range(10)]
-    # dimension_dict = {"attitude": ["positive", "negative"]}
-    # ag = gen_agent_traits(dimension_dict)
-    # example = [next(ag) for _ in range(100)]

edsl 0.1.45__py3-none-any.whl → 0.1.47__py3-none-any.whl

edsl 0.1.45py3-none-any.whl → 0.1.47py3-none-any.whl