edsl 0.1.31.dev3__py3-none-any.whl → 0.1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__version__.py +1 -1
- edsl/agents/Invigilator.py +7 -2
- edsl/agents/PromptConstructionMixin.py +35 -15
- edsl/config.py +15 -1
- edsl/conjure/Conjure.py +6 -0
- edsl/coop/coop.py +4 -0
- edsl/data/CacheHandler.py +3 -4
- edsl/enums.py +5 -0
- edsl/exceptions/general.py +10 -8
- edsl/inference_services/AwsBedrock.py +110 -0
- edsl/inference_services/AzureAI.py +197 -0
- edsl/inference_services/DeepInfraService.py +6 -91
- edsl/inference_services/GroqService.py +18 -0
- edsl/inference_services/InferenceServicesCollection.py +13 -8
- edsl/inference_services/OllamaService.py +18 -0
- edsl/inference_services/OpenAIService.py +68 -21
- edsl/inference_services/models_available_cache.py +31 -0
- edsl/inference_services/registry.py +14 -1
- edsl/jobs/Jobs.py +103 -21
- edsl/jobs/buckets/TokenBucket.py +12 -4
- edsl/jobs/interviews/Interview.py +31 -9
- edsl/jobs/interviews/InterviewExceptionEntry.py +101 -0
- edsl/jobs/interviews/InterviewTaskBuildingMixin.py +49 -33
- edsl/jobs/interviews/interview_exception_tracking.py +68 -10
- edsl/jobs/runners/JobsRunnerAsyncio.py +112 -81
- edsl/jobs/runners/JobsRunnerStatusData.py +0 -237
- edsl/jobs/runners/JobsRunnerStatusMixin.py +291 -35
- edsl/jobs/tasks/TaskCreators.py +8 -2
- edsl/jobs/tasks/TaskHistory.py +145 -1
- edsl/language_models/LanguageModel.py +62 -41
- edsl/language_models/registry.py +4 -0
- edsl/questions/QuestionBudget.py +0 -1
- edsl/questions/QuestionCheckBox.py +0 -1
- edsl/questions/QuestionExtract.py +0 -1
- edsl/questions/QuestionFreeText.py +2 -9
- edsl/questions/QuestionList.py +0 -1
- edsl/questions/QuestionMultipleChoice.py +1 -2
- edsl/questions/QuestionNumerical.py +0 -1
- edsl/questions/QuestionRank.py +0 -1
- edsl/results/DatasetExportMixin.py +33 -3
- edsl/scenarios/Scenario.py +14 -0
- edsl/scenarios/ScenarioList.py +216 -13
- edsl/scenarios/ScenarioListExportMixin.py +15 -4
- edsl/scenarios/ScenarioListPdfMixin.py +3 -0
- edsl/surveys/Rule.py +5 -2
- edsl/surveys/Survey.py +84 -1
- edsl/surveys/SurveyQualtricsImport.py +213 -0
- edsl/utilities/utilities.py +31 -0
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/METADATA +5 -1
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/RECORD +52 -46
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/LICENSE +0 -0
- {edsl-0.1.31.dev3.dist-info → edsl-0.1.32.dist-info}/WHEEL +0 -0
edsl/scenarios/ScenarioList.py
CHANGED
@@ -242,6 +242,18 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
242
242
|
|
243
243
|
return ScenarioList(new_data)
|
244
244
|
|
245
|
+
def from_urls(
|
246
|
+
self, urls: list[str], field_name: Optional[str] = "text"
|
247
|
+
) -> ScenarioList:
|
248
|
+
"""Create a ScenarioList from a list of URLs.
|
249
|
+
|
250
|
+
:param urls: A list of URLs.
|
251
|
+
:param field_name: The name of the field to store the text from the URLs.
|
252
|
+
|
253
|
+
|
254
|
+
"""
|
255
|
+
return ScenarioList([Scenario.from_url(url, field_name) for url in urls])
|
256
|
+
|
245
257
|
def select(self, *fields) -> ScenarioList:
|
246
258
|
"""
|
247
259
|
Selects scenarios with only the references fields.
|
@@ -288,12 +300,15 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
288
300
|
>>> s = ScenarioList.from_list("a", [1,2,3])
|
289
301
|
>>> s.to_dataset()
|
290
302
|
Dataset([{'a': [1, 2, 3]}])
|
303
|
+
>>> s = ScenarioList.from_list("a", [1,2,3]).add_list("b", [4,5,6])
|
304
|
+
>>> s.to_dataset()
|
305
|
+
Dataset([{'a': [1, 2, 3]}, {'b': [4, 5, 6]}])
|
291
306
|
"""
|
292
307
|
from edsl.results.Dataset import Dataset
|
293
308
|
|
294
309
|
keys = self[0].keys()
|
295
|
-
data = {key: [scenario[key] for scenario in self.data] for key in keys
|
296
|
-
return Dataset(
|
310
|
+
data = [{key: [scenario[key] for scenario in self.data]} for key in keys]
|
311
|
+
return Dataset(data)
|
297
312
|
|
298
313
|
def add_list(self, name, values) -> ScenarioList:
|
299
314
|
"""Add a list of values to a ScenarioList.
|
@@ -352,6 +367,99 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
352
367
|
data = cursor.fetchall()
|
353
368
|
return cls([Scenario(dict(zip(columns, row))) for row in data])
|
354
369
|
|
370
|
+
@classmethod
|
371
|
+
def from_latex(cls, tex_file_path: str):
|
372
|
+
with open(tex_file_path, "r") as file:
|
373
|
+
lines = file.readlines()
|
374
|
+
|
375
|
+
processed_lines = []
|
376
|
+
non_blank_lines = [
|
377
|
+
(i, line.strip()) for i, line in enumerate(lines) if line.strip()
|
378
|
+
]
|
379
|
+
|
380
|
+
for index, (line_no, text) in enumerate(non_blank_lines):
|
381
|
+
entry = {
|
382
|
+
"line_no": line_no + 1, # Using 1-based index for line numbers
|
383
|
+
"text": text,
|
384
|
+
"line_before": non_blank_lines[index - 1][1] if index > 0 else None,
|
385
|
+
"line_after": (
|
386
|
+
non_blank_lines[index + 1][1]
|
387
|
+
if index < len(non_blank_lines) - 1
|
388
|
+
else None
|
389
|
+
),
|
390
|
+
}
|
391
|
+
processed_lines.append(entry)
|
392
|
+
|
393
|
+
return ScenarioList([Scenario(entry) for entry in processed_lines])
|
394
|
+
|
395
|
+
@classmethod
|
396
|
+
def from_docx(cls, docx_file_path: str):
|
397
|
+
from docx import Document
|
398
|
+
|
399
|
+
doc = Document(docx_file_path)
|
400
|
+
lines = []
|
401
|
+
|
402
|
+
# Extract text from paragraphs, treating each paragraph as a line
|
403
|
+
for para in doc.paragraphs:
|
404
|
+
lines.extend(para.text.splitlines())
|
405
|
+
|
406
|
+
processed_lines = []
|
407
|
+
non_blank_lines = [
|
408
|
+
(i, line.strip()) for i, line in enumerate(lines) if line.strip()
|
409
|
+
]
|
410
|
+
|
411
|
+
for index, (line_no, text) in enumerate(non_blank_lines):
|
412
|
+
entry = {
|
413
|
+
"line_no": line_no + 1, # Using 1-based index for line numbers
|
414
|
+
"text": text,
|
415
|
+
"line_before": non_blank_lines[index - 1][1] if index > 0 else None,
|
416
|
+
"line_after": (
|
417
|
+
non_blank_lines[index + 1][1]
|
418
|
+
if index < len(non_blank_lines) - 1
|
419
|
+
else None
|
420
|
+
),
|
421
|
+
}
|
422
|
+
processed_lines.append(entry)
|
423
|
+
|
424
|
+
return ScenarioList([Scenario(entry) for entry in processed_lines])
|
425
|
+
|
426
|
+
@classmethod
|
427
|
+
def from_google_doc(cls, url: str) -> ScenarioList:
|
428
|
+
"""Create a ScenarioList from a Google Doc.
|
429
|
+
|
430
|
+
This method downloads the Google Doc as a Word file (.docx), saves it to a temporary file,
|
431
|
+
and then reads it using the from_docx class method.
|
432
|
+
|
433
|
+
Args:
|
434
|
+
url (str): The URL to the Google Doc.
|
435
|
+
|
436
|
+
Returns:
|
437
|
+
ScenarioList: An instance of the ScenarioList class.
|
438
|
+
|
439
|
+
"""
|
440
|
+
import tempfile
|
441
|
+
import requests
|
442
|
+
from docx import Document
|
443
|
+
|
444
|
+
if "/edit" in url:
|
445
|
+
doc_id = url.split("/d/")[1].split("/edit")[0]
|
446
|
+
else:
|
447
|
+
raise ValueError("Invalid Google Doc URL format.")
|
448
|
+
|
449
|
+
export_url = f"https://docs.google.com/document/d/{doc_id}/export?format=docx"
|
450
|
+
|
451
|
+
# Download the Google Doc as a Word file (.docx)
|
452
|
+
response = requests.get(export_url)
|
453
|
+
response.raise_for_status() # Ensure the request was successful
|
454
|
+
|
455
|
+
# Save the Word file to a temporary file
|
456
|
+
with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file:
|
457
|
+
temp_file.write(response.content)
|
458
|
+
temp_filename = temp_file.name
|
459
|
+
|
460
|
+
# Call the from_docx class method with the temporary file
|
461
|
+
return cls.from_docx(temp_filename)
|
462
|
+
|
355
463
|
@classmethod
|
356
464
|
def from_pandas(cls, df) -> ScenarioList:
|
357
465
|
"""Create a ScenarioList from a pandas DataFrame.
|
@@ -379,6 +487,112 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
379
487
|
else:
|
380
488
|
return {scenario[field]: scenario[value] for scenario in self}
|
381
489
|
|
490
|
+
@classmethod
|
491
|
+
def from_excel(
|
492
|
+
cls, filename: str, sheet_name: Optional[str] = None
|
493
|
+
) -> ScenarioList:
|
494
|
+
"""Create a ScenarioList from an Excel file.
|
495
|
+
|
496
|
+
If the Excel file contains multiple sheets and no sheet_name is provided,
|
497
|
+
the method will print the available sheets and require the user to specify one.
|
498
|
+
|
499
|
+
Example:
|
500
|
+
|
501
|
+
>>> import tempfile
|
502
|
+
>>> import os
|
503
|
+
>>> import pandas as pd
|
504
|
+
>>> with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as f:
|
505
|
+
... df1 = pd.DataFrame({
|
506
|
+
... 'name': ['Alice', 'Bob'],
|
507
|
+
... 'age': [30, 25],
|
508
|
+
... 'location': ['New York', 'Los Angeles']
|
509
|
+
... })
|
510
|
+
... df2 = pd.DataFrame({
|
511
|
+
... 'name': ['Charlie', 'David'],
|
512
|
+
... 'age': [35, 40],
|
513
|
+
... 'location': ['Chicago', 'Boston']
|
514
|
+
... })
|
515
|
+
... with pd.ExcelWriter(f.name) as writer:
|
516
|
+
... df1.to_excel(writer, sheet_name='Sheet1', index=False)
|
517
|
+
... df2.to_excel(writer, sheet_name='Sheet2', index=False)
|
518
|
+
... temp_filename = f.name
|
519
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename, sheet_name='Sheet1')
|
520
|
+
>>> len(scenario_list)
|
521
|
+
2
|
522
|
+
>>> scenario_list[0]['name']
|
523
|
+
'Alice'
|
524
|
+
>>> scenario_list = ScenarioList.from_excel(temp_filename) # Should raise an error and list sheets
|
525
|
+
Traceback (most recent call last):
|
526
|
+
...
|
527
|
+
ValueError: Please provide a sheet name to load data from.
|
528
|
+
"""
|
529
|
+
from edsl.scenarios.Scenario import Scenario
|
530
|
+
import pandas as pd
|
531
|
+
|
532
|
+
# Get all sheets
|
533
|
+
all_sheets = pd.read_excel(filename, sheet_name=None)
|
534
|
+
|
535
|
+
# If no sheet_name is provided and there is more than one sheet, print available sheets
|
536
|
+
if sheet_name is None:
|
537
|
+
if len(all_sheets) > 1:
|
538
|
+
print("The Excel file contains multiple sheets:")
|
539
|
+
for name in all_sheets.keys():
|
540
|
+
print(f"- {name}")
|
541
|
+
raise ValueError("Please provide a sheet name to load data from.")
|
542
|
+
else:
|
543
|
+
# If there is only one sheet, use it
|
544
|
+
sheet_name = list(all_sheets.keys())[0]
|
545
|
+
|
546
|
+
# Load the specified or determined sheet
|
547
|
+
df = pd.read_excel(filename, sheet_name=sheet_name)
|
548
|
+
|
549
|
+
observations = []
|
550
|
+
for _, row in df.iterrows():
|
551
|
+
observations.append(Scenario(row.to_dict()))
|
552
|
+
|
553
|
+
return cls(observations)
|
554
|
+
|
555
|
+
@classmethod
|
556
|
+
def from_google_sheet(cls, url: str, sheet_name: str = None) -> ScenarioList:
|
557
|
+
"""Create a ScenarioList from a Google Sheet.
|
558
|
+
|
559
|
+
This method downloads the Google Sheet as an Excel file, saves it to a temporary file,
|
560
|
+
and then reads it using the from_excel class method.
|
561
|
+
|
562
|
+
Args:
|
563
|
+
url (str): The URL to the Google Sheet.
|
564
|
+
sheet_name (str, optional): The name of the sheet to load. If None, the method will behave
|
565
|
+
the same as from_excel regarding multiple sheets.
|
566
|
+
|
567
|
+
Returns:
|
568
|
+
ScenarioList: An instance of the ScenarioList class.
|
569
|
+
|
570
|
+
"""
|
571
|
+
import pandas as pd
|
572
|
+
import tempfile
|
573
|
+
import requests
|
574
|
+
|
575
|
+
if "/edit" in url:
|
576
|
+
sheet_id = url.split("/d/")[1].split("/edit")[0]
|
577
|
+
else:
|
578
|
+
raise ValueError("Invalid Google Sheet URL format.")
|
579
|
+
|
580
|
+
export_url = (
|
581
|
+
f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
|
582
|
+
)
|
583
|
+
|
584
|
+
# Download the Google Sheet as an Excel file
|
585
|
+
response = requests.get(export_url)
|
586
|
+
response.raise_for_status() # Ensure the request was successful
|
587
|
+
|
588
|
+
# Save the Excel file to a temporary file
|
589
|
+
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
|
590
|
+
temp_file.write(response.content)
|
591
|
+
temp_filename = temp_file.name
|
592
|
+
|
593
|
+
# Call the from_excel class method with the temporary file
|
594
|
+
return cls.from_excel(temp_filename, sheet_name=sheet_name)
|
595
|
+
|
382
596
|
@classmethod
|
383
597
|
def from_csv(cls, filename: str) -> ScenarioList:
|
384
598
|
"""Create a ScenarioList from a CSV file.
|
@@ -484,17 +698,6 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
|
|
484
698
|
table.add_row(str(i), s.rich_print())
|
485
699
|
return table
|
486
700
|
|
487
|
-
# def print(
|
488
|
-
# self,
|
489
|
-
# format: Optional[str] = None,
|
490
|
-
# max_rows: Optional[int] = None,
|
491
|
-
# pretty_labels: Optional[dict] = None,
|
492
|
-
# filename: str = None,
|
493
|
-
# ):
|
494
|
-
# from edsl.utilities.interface import print_scenario_list
|
495
|
-
|
496
|
-
# print_scenario_list(self[:max_rows])
|
497
|
-
|
498
701
|
def __getitem__(self, key: Union[int, slice]) -> Any:
|
499
702
|
"""Return the item at the given index.
|
500
703
|
|
@@ -20,13 +20,24 @@ def to_dataset(func):
|
|
20
20
|
return wrapper
|
21
21
|
|
22
22
|
|
23
|
-
def
|
24
|
-
for attr_name, attr_value in
|
25
|
-
if callable(attr_value):
|
23
|
+
def decorate_methods_from_mixin(cls, mixin_cls):
|
24
|
+
for attr_name, attr_value in mixin_cls.__dict__.items():
|
25
|
+
if callable(attr_value) and not attr_name.startswith("__"):
|
26
26
|
setattr(cls, attr_name, to_dataset(attr_value))
|
27
27
|
return cls
|
28
28
|
|
29
29
|
|
30
|
-
|
30
|
+
# def decorate_all_methods(cls):
|
31
|
+
# for attr_name, attr_value in cls.__dict__.items():
|
32
|
+
# if callable(attr_value):
|
33
|
+
# setattr(cls, attr_name, to_dataset(attr_value))
|
34
|
+
# return cls
|
35
|
+
|
36
|
+
|
37
|
+
# @decorate_all_methods
|
31
38
|
class ScenarioListExportMixin(DatasetExportMixin):
|
32
39
|
"""Mixin class for exporting Results objects."""
|
40
|
+
|
41
|
+
def __init_subclass__(cls, **kwargs):
|
42
|
+
super().__init_subclass__(**kwargs)
|
43
|
+
decorate_methods_from_mixin(cls, DatasetExportMixin)
|
@@ -43,6 +43,9 @@ class ScenarioListPdfMixin:
|
|
43
43
|
|
44
44
|
@staticmethod
|
45
45
|
def extract_text_from_pdf(pdf_path):
|
46
|
+
from edsl import Scenario
|
47
|
+
|
48
|
+
# TODO: Add test case
|
46
49
|
# Ensure the file exists
|
47
50
|
if not os.path.exists(pdf_path):
|
48
51
|
raise FileNotFoundError(f"The file {pdf_path} does not exist.")
|
edsl/surveys/Rule.py
CHANGED
@@ -117,13 +117,15 @@ class Rule:
|
|
117
117
|
def _checks(self):
|
118
118
|
pass
|
119
119
|
|
120
|
-
|
120
|
+
# def _to_dict(self):
|
121
|
+
|
122
|
+
# @add_edsl_version
|
121
123
|
def to_dict(self):
|
122
124
|
"""Convert the rule to a dictionary for serialization.
|
123
125
|
|
124
126
|
>>> r = Rule.example()
|
125
127
|
>>> r.to_dict()
|
126
|
-
{'current_q': 1, 'expression': "q1 == 'yes'", 'next_q': 2, 'priority': 0, 'question_name_to_index': {'q1': 1}, 'before_rule': False
|
128
|
+
{'current_q': 1, 'expression': "q1 == 'yes'", 'next_q': 2, 'priority': 0, 'question_name_to_index': {'q1': 1}, 'before_rule': False}
|
127
129
|
"""
|
128
130
|
return {
|
129
131
|
"current_q": self.current_q,
|
@@ -133,6 +135,7 @@ class Rule:
|
|
133
135
|
"question_name_to_index": self.question_name_to_index,
|
134
136
|
"before_rule": self.before_rule,
|
135
137
|
}
|
138
|
+
# return self._to_dict()
|
136
139
|
|
137
140
|
@classmethod
|
138
141
|
@remove_edsl_version
|
edsl/surveys/Survey.py
CHANGED
@@ -2,6 +2,9 @@
|
|
2
2
|
|
3
3
|
from __future__ import annotations
|
4
4
|
import re
|
5
|
+
import tempfile
|
6
|
+
import requests
|
7
|
+
|
5
8
|
from typing import Any, Generator, Optional, Union, List, Literal, Callable
|
6
9
|
from uuid import uuid4
|
7
10
|
from edsl.Base import Base
|
@@ -75,6 +78,41 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
75
78
|
|
76
79
|
warnings.warn("name parameter to a survey is deprecated.")
|
77
80
|
|
81
|
+
def simulate(self) -> dict:
|
82
|
+
"""Simulate the survey and return the answers."""
|
83
|
+
i = self.gen_path_through_survey()
|
84
|
+
q = next(i)
|
85
|
+
while True:
|
86
|
+
try:
|
87
|
+
answer = q._simulate_answer()
|
88
|
+
q = i.send({q.question_name: answer["answer"]})
|
89
|
+
except StopIteration:
|
90
|
+
break
|
91
|
+
return self.answers
|
92
|
+
|
93
|
+
def create_agent(self) -> "Agent":
|
94
|
+
"""Create an agent from the simulated answers."""
|
95
|
+
answers_dict = self.simulate()
|
96
|
+
from edsl.agents.Agent import Agent
|
97
|
+
|
98
|
+
a = Agent(traits=answers_dict)
|
99
|
+
|
100
|
+
def construct_answer_dict_function(traits: dict) -> Callable:
|
101
|
+
def func(self, question: "QuestionBase", scenario=None):
|
102
|
+
return traits.get(question.question_name, None)
|
103
|
+
|
104
|
+
return func
|
105
|
+
|
106
|
+
a.add_direct_question_answering_method(
|
107
|
+
construct_answer_dict_function(answers_dict)
|
108
|
+
)
|
109
|
+
return a
|
110
|
+
|
111
|
+
def simulate_results(self) -> "Results":
|
112
|
+
"""Simulate the survey and return the results."""
|
113
|
+
a = self.create_agent()
|
114
|
+
return self.by([a]).run()
|
115
|
+
|
78
116
|
def get(self, question_name: str) -> QuestionBase:
|
79
117
|
"""
|
80
118
|
Return the question object given the question name.
|
@@ -141,6 +179,12 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
141
179
|
|
142
180
|
@property
|
143
181
|
def parameters(self):
|
182
|
+
"""Return a set of parameters in the survey.
|
183
|
+
|
184
|
+
>>> s = Survey.example()
|
185
|
+
>>> s.parameters
|
186
|
+
set()
|
187
|
+
"""
|
144
188
|
return set.union(*[q.parameters for q in self.questions])
|
145
189
|
|
146
190
|
@property
|
@@ -702,9 +746,13 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
702
746
|
>>> i2.send({"q0": "no"})
|
703
747
|
Question('multiple_choice', question_name = \"""q1\""", question_text = \"""Why not?\""", question_options = ['killer bees in cafeteria', 'other'])
|
704
748
|
"""
|
749
|
+
self.answers = {}
|
705
750
|
question = self._first_question()
|
706
751
|
while not question == EndOfSurvey:
|
707
|
-
|
752
|
+
# breakpoint()
|
753
|
+
answer = yield question
|
754
|
+
self.answers.update(answer)
|
755
|
+
# print(f"Answers: {self.answers}")
|
708
756
|
## TODO: This should also include survey and agent attributes
|
709
757
|
question = self.next_question(question, self.answers)
|
710
758
|
|
@@ -775,6 +823,15 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
775
823
|
|
776
824
|
@property
|
777
825
|
def piping_dag(self) -> DAG:
|
826
|
+
"""Figures out the DAG of piping dependencies.
|
827
|
+
|
828
|
+
>>> from edsl import QuestionFreeText
|
829
|
+
>>> q0 = QuestionFreeText(question_text="Here is a question", question_name="q0")
|
830
|
+
>>> q1 = QuestionFreeText(question_text="You previously answered {{ q0 }}---how do you feel now?", question_name="q1")
|
831
|
+
>>> s = Survey([q0, q1])
|
832
|
+
>>> s.piping_dag
|
833
|
+
{1: {0}}
|
834
|
+
"""
|
778
835
|
d = {}
|
779
836
|
for question_name, depenencies in self.parameters_by_question.items():
|
780
837
|
if depenencies:
|
@@ -919,6 +976,32 @@ class Survey(SurveyExportMixin, SurveyFlowVisualizationMixin, Base):
|
|
919
976
|
)
|
920
977
|
return survey
|
921
978
|
|
979
|
+
@classmethod
|
980
|
+
def from_qsf(
|
981
|
+
cls, qsf_file: Optional[str] = None, url: Optional[str] = None
|
982
|
+
) -> Survey:
|
983
|
+
"""Create a Survey object from a Qualtrics QSF file."""
|
984
|
+
|
985
|
+
if url and qsf_file:
|
986
|
+
raise ValueError("Only one of url or qsf_file can be provided.")
|
987
|
+
|
988
|
+
if (not url) and (not qsf_file):
|
989
|
+
raise ValueError("Either url or qsf_file must be provided.")
|
990
|
+
|
991
|
+
if url:
|
992
|
+
response = requests.get(url)
|
993
|
+
response.raise_for_status() # Ensure the request was successful
|
994
|
+
|
995
|
+
# Save the Excel file to a temporary file
|
996
|
+
with tempfile.NamedTemporaryFile(suffix=".qsf", delete=False) as temp_file:
|
997
|
+
temp_file.write(response.content)
|
998
|
+
qsf_file = temp_file.name
|
999
|
+
|
1000
|
+
from edsl.surveys.SurveyQualtricsImport import SurveyQualtricsImport
|
1001
|
+
|
1002
|
+
so = SurveyQualtricsImport(qsf_file)
|
1003
|
+
return so.create_survey()
|
1004
|
+
|
922
1005
|
###################
|
923
1006
|
# DISPLAY METHODS
|
924
1007
|
###################
|
@@ -0,0 +1,213 @@
|
|
1
|
+
import json
|
2
|
+
import html
|
3
|
+
import re
|
4
|
+
|
5
|
+
from edsl import Question
|
6
|
+
from edsl import Survey
|
7
|
+
|
8
|
+
qualtrics_codes = {
|
9
|
+
"TE": "free_text",
|
10
|
+
"MC": "multiple_choice",
|
11
|
+
"Matrix": "matrix",
|
12
|
+
"DB": "free_text", # not quite right, but for now
|
13
|
+
"Timing": "free_text", # not quite right, but for now
|
14
|
+
}
|
15
|
+
# TE (Text Entry): Allows respondents to input a text response.
|
16
|
+
# MC (Multiple Choice): Provides respondents with a list of options to choose from.
|
17
|
+
# DB (Descriptive Text or Information): Displays text or information without requiring a response.
|
18
|
+
# Matrix: A grid-style question where respondents can evaluate multiple items using the same set of response options.
|
19
|
+
|
20
|
+
|
21
|
+
def clean_html(raw_html):
|
22
|
+
# Unescape HTML entities
|
23
|
+
clean_text = html.unescape(raw_html)
|
24
|
+
# Remove HTML tags
|
25
|
+
clean_text = re.sub(r"<.*?>", "", clean_text)
|
26
|
+
# Replace non-breaking spaces with regular spaces
|
27
|
+
clean_text = clean_text.replace("\xa0", " ")
|
28
|
+
# Optionally, strip leading/trailing spaces
|
29
|
+
clean_text = clean_text.strip()
|
30
|
+
return clean_text
|
31
|
+
|
32
|
+
|
33
|
+
class QualtricsQuestion:
|
34
|
+
def __init__(self, question_json, debug=False):
|
35
|
+
self.debug = debug
|
36
|
+
self.question_json = question_json
|
37
|
+
if self.element != "SQ":
|
38
|
+
raise ValueError("Invalid question element type")
|
39
|
+
|
40
|
+
@property
|
41
|
+
def element(self):
|
42
|
+
return self.question_json["Element"]
|
43
|
+
|
44
|
+
@property
|
45
|
+
def selector(self):
|
46
|
+
return self.question_json.get("Selector", None)
|
47
|
+
|
48
|
+
@property
|
49
|
+
def question_name(self):
|
50
|
+
return self.question_json["PrimaryAttribute"]
|
51
|
+
|
52
|
+
@property
|
53
|
+
def question_text(self):
|
54
|
+
return clean_html(self.question_json["Payload"]["QuestionText"])
|
55
|
+
|
56
|
+
@property
|
57
|
+
def raw_question_type(self):
|
58
|
+
return self.question_json["Payload"]["QuestionType"]
|
59
|
+
|
60
|
+
@property
|
61
|
+
def question_type(self):
|
62
|
+
q_type = qualtrics_codes.get(self.raw_question_type, None)
|
63
|
+
if q_type is None:
|
64
|
+
print(f"Unknown question type: {self.raw_question_type}")
|
65
|
+
return None
|
66
|
+
return q_type
|
67
|
+
|
68
|
+
@property
|
69
|
+
def choices(self):
|
70
|
+
if "Choices" in self.question_json["Payload"]:
|
71
|
+
return [
|
72
|
+
choice["Display"]
|
73
|
+
for choice in self.question_json["Payload"]["Choices"].values()
|
74
|
+
]
|
75
|
+
return None
|
76
|
+
|
77
|
+
@property
|
78
|
+
def answers(self):
|
79
|
+
if "Answers" in self.question_json["Payload"]:
|
80
|
+
return [
|
81
|
+
choice["Display"]
|
82
|
+
for choice in self.question_json["Payload"]["Choices"].values()
|
83
|
+
]
|
84
|
+
return None
|
85
|
+
|
86
|
+
def to_edsl(self):
|
87
|
+
if self.question_type == "free_text":
|
88
|
+
try:
|
89
|
+
q = Question(
|
90
|
+
**{
|
91
|
+
"question_type": self.question_type,
|
92
|
+
"question_text": self.question_text,
|
93
|
+
"question_name": self.question_name,
|
94
|
+
}
|
95
|
+
)
|
96
|
+
return [q]
|
97
|
+
except Exception as e:
|
98
|
+
return []
|
99
|
+
|
100
|
+
if self.question_type == "multiple_choice":
|
101
|
+
# Let's figure of it it's actually a checkbox question
|
102
|
+
if self.selector == "MAVR" or self.selector == "MULTIPLE":
|
103
|
+
try:
|
104
|
+
q = Question(
|
105
|
+
**{
|
106
|
+
"question_type": "checkbox",
|
107
|
+
"question_text": self.question_text,
|
108
|
+
"question_name": self.question_name,
|
109
|
+
"question_options": self.choices,
|
110
|
+
}
|
111
|
+
)
|
112
|
+
return [q]
|
113
|
+
except Exception as e:
|
114
|
+
return []
|
115
|
+
|
116
|
+
# maybe it's a linear scale!
|
117
|
+
if "<br>" in self.choices[0]:
|
118
|
+
option_labels = {}
|
119
|
+
question_options = []
|
120
|
+
for choice in self.choices:
|
121
|
+
if "<br>" in choice:
|
122
|
+
option_label, question_option = choice.split("<br>")
|
123
|
+
option_labels[int(question_option)] = option_label
|
124
|
+
question_options.append(int(question_option))
|
125
|
+
else:
|
126
|
+
question_options.append(int(choice))
|
127
|
+
try:
|
128
|
+
q = Question(
|
129
|
+
**{
|
130
|
+
"question_type": "linear_scale",
|
131
|
+
"question_text": self.question_text,
|
132
|
+
"question_name": self.question_name,
|
133
|
+
"question_options": question_options,
|
134
|
+
"option_labels": option_labels,
|
135
|
+
}
|
136
|
+
)
|
137
|
+
return [q]
|
138
|
+
except Exception as e:
|
139
|
+
if self.debug:
|
140
|
+
raise e
|
141
|
+
else:
|
142
|
+
print(e)
|
143
|
+
return []
|
144
|
+
|
145
|
+
try:
|
146
|
+
q = Question(
|
147
|
+
**{
|
148
|
+
"question_type": self.question_type,
|
149
|
+
"question_text": self.question_text,
|
150
|
+
"question_name": self.question_name,
|
151
|
+
"question_options": self.choices,
|
152
|
+
}
|
153
|
+
)
|
154
|
+
return [q]
|
155
|
+
except Exception as e:
|
156
|
+
return []
|
157
|
+
|
158
|
+
if self.question_type == "matrix":
|
159
|
+
questions = []
|
160
|
+
for index, choice in enumerate(self.choices):
|
161
|
+
try:
|
162
|
+
q = Question(
|
163
|
+
**{
|
164
|
+
"question_type": "multiple_choice",
|
165
|
+
"question_text": self.question_text + f" ({choice})",
|
166
|
+
"question_name": self.question_name + f"_{index}",
|
167
|
+
"question_options": self.answers,
|
168
|
+
}
|
169
|
+
)
|
170
|
+
questions.append(q)
|
171
|
+
except Exception as e:
|
172
|
+
continue
|
173
|
+
|
174
|
+
return questions
|
175
|
+
|
176
|
+
raise ValueError(f"Invalid question type: {self.question_type}")
|
177
|
+
|
178
|
+
|
179
|
+
class SurveyQualtricsImport:
|
180
|
+
def __init__(self, qsf_file_name: str):
|
181
|
+
self.qsf_file_name = qsf_file_name
|
182
|
+
self.question_data = self.extract_questions_from_json()
|
183
|
+
|
184
|
+
def create_survey(self):
|
185
|
+
questions = []
|
186
|
+
for qualtrics_questions in self.question_data:
|
187
|
+
questions.extend(qualtrics_questions.to_edsl())
|
188
|
+
return Survey(questions)
|
189
|
+
|
190
|
+
def extract_questions_from_json(self):
|
191
|
+
with open(self.qsf_file_name, "r") as f:
|
192
|
+
survey_data = json.load(f)
|
193
|
+
|
194
|
+
questions = survey_data["SurveyElements"]
|
195
|
+
|
196
|
+
extracted_questions = []
|
197
|
+
|
198
|
+
for question in questions:
|
199
|
+
if question["Element"] == "SQ":
|
200
|
+
extracted_questions.append(QualtricsQuestion(question))
|
201
|
+
|
202
|
+
return extracted_questions
|
203
|
+
|
204
|
+
|
205
|
+
if __name__ == "__main__":
|
206
|
+
survey_creator = SurveyQualtricsImport("example.qsf")
|
207
|
+
# print(survey_creator.question_data)
|
208
|
+
survey = survey_creator.create_survey()
|
209
|
+
# info = survey.push()
|
210
|
+
# print(info)
|
211
|
+
# questions = survey.extract_questions_from_json()
|
212
|
+
# for question in questions:
|
213
|
+
# print(question)
|