edsl 0.1.44__py3-none-any.whl → 0.1.46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. edsl/Base.py +7 -3
  2. edsl/__version__.py +1 -1
  3. edsl/agents/InvigilatorBase.py +3 -1
  4. edsl/agents/PromptConstructor.py +66 -91
  5. edsl/agents/QuestionInstructionPromptBuilder.py +160 -79
  6. edsl/agents/QuestionTemplateReplacementsBuilder.py +80 -17
  7. edsl/agents/question_option_processor.py +15 -6
  8. edsl/coop/CoopFunctionsMixin.py +3 -4
  9. edsl/coop/coop.py +171 -96
  10. edsl/data/RemoteCacheSync.py +10 -9
  11. edsl/enums.py +3 -3
  12. edsl/inference_services/AnthropicService.py +11 -9
  13. edsl/inference_services/AvailableModelFetcher.py +2 -0
  14. edsl/inference_services/AwsBedrock.py +1 -2
  15. edsl/inference_services/AzureAI.py +12 -9
  16. edsl/inference_services/GoogleService.py +9 -4
  17. edsl/inference_services/InferenceServicesCollection.py +2 -2
  18. edsl/inference_services/MistralAIService.py +1 -2
  19. edsl/inference_services/OpenAIService.py +9 -4
  20. edsl/inference_services/PerplexityService.py +2 -1
  21. edsl/inference_services/{GrokService.py → XAIService.py} +2 -2
  22. edsl/inference_services/registry.py +2 -2
  23. edsl/jobs/AnswerQuestionFunctionConstructor.py +12 -1
  24. edsl/jobs/Jobs.py +24 -17
  25. edsl/jobs/JobsChecks.py +10 -13
  26. edsl/jobs/JobsPrompts.py +49 -26
  27. edsl/jobs/JobsRemoteInferenceHandler.py +4 -5
  28. edsl/jobs/async_interview_runner.py +3 -1
  29. edsl/jobs/check_survey_scenario_compatibility.py +5 -5
  30. edsl/jobs/data_structures.py +3 -0
  31. edsl/jobs/interviews/Interview.py +6 -3
  32. edsl/jobs/interviews/InterviewExceptionEntry.py +12 -0
  33. edsl/jobs/tasks/TaskHistory.py +1 -1
  34. edsl/language_models/LanguageModel.py +6 -3
  35. edsl/language_models/PriceManager.py +45 -5
  36. edsl/language_models/model.py +47 -26
  37. edsl/questions/QuestionBase.py +21 -0
  38. edsl/questions/QuestionBasePromptsMixin.py +103 -0
  39. edsl/questions/QuestionFreeText.py +22 -5
  40. edsl/questions/descriptors.py +4 -0
  41. edsl/questions/question_base_gen_mixin.py +96 -29
  42. edsl/results/Dataset.py +65 -0
  43. edsl/results/DatasetExportMixin.py +320 -32
  44. edsl/results/Result.py +27 -0
  45. edsl/results/Results.py +22 -2
  46. edsl/results/ResultsGGMixin.py +7 -3
  47. edsl/scenarios/DocumentChunker.py +2 -0
  48. edsl/scenarios/FileStore.py +10 -0
  49. edsl/scenarios/PdfExtractor.py +21 -1
  50. edsl/scenarios/Scenario.py +25 -9
  51. edsl/scenarios/ScenarioList.py +226 -24
  52. edsl/scenarios/handlers/__init__.py +1 -0
  53. edsl/scenarios/handlers/docx.py +5 -1
  54. edsl/scenarios/handlers/jpeg.py +39 -0
  55. edsl/surveys/Survey.py +5 -4
  56. edsl/surveys/SurveyFlowVisualization.py +91 -43
  57. edsl/templates/error_reporting/exceptions_table.html +7 -8
  58. edsl/templates/error_reporting/interview_details.html +1 -1
  59. edsl/templates/error_reporting/interviews.html +0 -1
  60. edsl/templates/error_reporting/overview.html +2 -7
  61. edsl/templates/error_reporting/performance_plot.html +1 -1
  62. edsl/templates/error_reporting/report.css +1 -1
  63. edsl/utilities/PrettyList.py +14 -0
  64. edsl-0.1.46.dist-info/METADATA +246 -0
  65. {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/RECORD +67 -66
  66. edsl-0.1.44.dist-info/METADATA +0 -110
  67. {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/LICENSE +0 -0
  68. {edsl-0.1.44.dist-info → edsl-0.1.46.dist-info}/WHEEL +0 -0
edsl/results/Result.py CHANGED
@@ -439,6 +439,33 @@ class Result(Base, UserDict):
439
439
  from edsl.results.Results import Results
440
440
 
441
441
  return Results.example()[0]
442
+
443
+ def score_with_answer_key(self, answer_key: dict) -> Union[int, float]:
444
+ """Score the result using an answer key.
445
+
446
+ :param answer_key: A dictionary that maps question_names to answers
447
+
448
+ >>> Result.example()['answer']
449
+ {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
450
+
451
+ >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': 'Great'}
452
+ >>> Result.example().score_with_answer_key(answer_key)
453
+ {'correct': 2, 'incorrect': 0, 'missing': 0}
454
+ >>> answer_key = {'how_feeling': 'OK', 'how_feeling_yesterday': ['Great', 'Good']}
455
+ >>> Result.example().score_with_answer_key(answer_key)
456
+ {'correct': 2, 'incorrect': 0, 'missing': 0}
457
+ """
458
+ final_scores = {'correct': 0, 'incorrect': 0, 'missing': 0}
459
+ for question_name, answer in self.answer.items():
460
+ if question_name in answer_key:
461
+ if answer == answer_key[question_name] or answer in answer_key[question_name]:
462
+ final_scores['correct'] += 1
463
+ else:
464
+ final_scores['incorrect'] += 1
465
+ else:
466
+ final_scores['missing'] += 1
467
+
468
+ return final_scores
442
469
 
443
470
  def score(self, scoring_function: Callable) -> Union[int, float]:
444
471
  """Score the result using a passed-in scoring function.
edsl/results/Results.py CHANGED
@@ -34,7 +34,7 @@ if TYPE_CHECKING:
34
34
  from simpleeval import EvalWithCompoundTypes
35
35
 
36
36
  from edsl.results.ResultsExportMixin import ResultsExportMixin
37
- from edsl.results.ResultsGGMixin import ResultsGGMixin
37
+ from edsl.results.ResultsGGMixin import GGPlotMethod
38
38
  from edsl.results.results_fetch_mixin import ResultsFetchMixin
39
39
  from edsl.utilities.remove_edsl_version import remove_edsl_version
40
40
 
@@ -100,7 +100,7 @@ class NotReadyObject:
100
100
  class Mixins(
101
101
  ResultsExportMixin,
102
102
  ResultsFetchMixin,
103
- ResultsGGMixin,
103
+ # ResultsGGMixin,
104
104
  ):
105
105
  def long(self):
106
106
  return self.table().long()
@@ -151,6 +151,19 @@ class Results(UserList, Mixins, Base):
151
151
  "cache_keys",
152
152
  ]
153
153
 
154
+ def ggplot2(
155
+ self,
156
+ ggplot_code: str,
157
+ shape="wide",
158
+ sql: str = None,
159
+ remove_prefix: bool = True,
160
+ debug: bool = False,
161
+ height=4,
162
+ width=6,
163
+ factor_orders: Optional[dict] = None,
164
+ ):
165
+ return GGPlotMethod(self).ggplot2(ggplot_code, shape, sql, remove_prefix, debug, height, width, factor_orders)
166
+
154
167
  @classmethod
155
168
  def from_job_info(cls, job_info: dict) -> Results:
156
169
  """
@@ -1277,6 +1290,13 @@ class Results(UserList, Mixins, Base):
1277
1290
  """
1278
1291
  return [r.score(f) for r in self.data]
1279
1292
 
1293
+ def score_with_answer_key(self, answer_key: dict) -> list:
1294
+ """Score the results using an answer key.
1295
+
1296
+ :param answer_key: A dictionary that maps answer values to scores.
1297
+ """
1298
+ return [r.score_with_answer_key(answer_key) for r in self.data]
1299
+
1280
1300
 
1281
1301
  def fetch_remote(self, job_info: "RemoteJobInfo") -> None:
1282
1302
  """
@@ -75,7 +75,11 @@ class GGPlot:
75
75
 
76
76
  return self._svg_data
77
77
 
78
- class ResultsGGMixin:
78
+ class GGPlotMethod:
79
+
80
+ def __init__(self, results: 'Results'):
81
+ self.results = results
82
+
79
83
  """Mixin class for ggplot2 plotting."""
80
84
 
81
85
  def ggplot2(
@@ -106,9 +110,9 @@ class ResultsGGMixin:
106
110
  sql = "select * from self"
107
111
 
108
112
  if shape == "long":
109
- df = self.sql(sql, shape="long")
113
+ df = self.results.sql(sql, shape="long")
110
114
  elif shape == "wide":
111
- df = self.sql(sql, remove_prefix=remove_prefix)
115
+ df = self.results.sql(sql, remove_prefix=remove_prefix)
112
116
 
113
117
  # Convert DataFrame to CSV format
114
118
  csv_data = df.to_csv().text
@@ -85,6 +85,8 @@ class DocumentChunker:
85
85
  new_scenario = copy.deepcopy(self.scenario)
86
86
  new_scenario[field] = chunk
87
87
  new_scenario[field + "_chunk"] = i
88
+ new_scenario[field + "_char_count"] = len(chunk)
89
+ new_scenario[field + "_word_count"] = len(chunk.split())
88
90
  if include_original:
89
91
  if hash_original:
90
92
  new_scenario[field + "_original"] = hashlib.md5(
@@ -29,6 +29,12 @@ class FileStore(Scenario):
29
29
  if path is None and "filename" in kwargs:
30
30
  path = kwargs["filename"]
31
31
 
32
+ # Check if path is a URL and handle download
33
+ if path and (path.startswith('http://') or path.startswith('https://')):
34
+ temp_filestore = self.from_url(path, mime_type=mime_type)
35
+ path = temp_filestore._path
36
+ mime_type = temp_filestore.mime_type
37
+
32
38
  self._path = path # Store the original path privately
33
39
  self._temp_path = None # Track any generated temporary file
34
40
 
@@ -138,6 +144,10 @@ class FileStore(Scenario):
138
144
  base64_encoded_data = base64.b64encode(binary_data)
139
145
  self.binary = True
140
146
  # Convert the base64 bytes to a string
147
+ except FileNotFoundError:
148
+ print(f"File not found: {file_path}")
149
+ print("Current working directory:", os.getcwd())
150
+ raise
141
151
  base64_string = base64_encoded_data.decode("utf-8")
142
152
 
143
153
  return base64_string
@@ -4,10 +4,30 @@ import os
4
4
  class PdfExtractor:
5
5
  def __init__(self, pdf_path: str):
6
6
  self.pdf_path = pdf_path
7
+ self._has_pymupdf = self._check_pymupdf()
7
8
  #self.constructor = parent_object.__class__
8
9
 
10
+ def _check_pymupdf(self):
11
+ """Check if PyMuPDF is installed."""
12
+ try:
13
+ import fitz
14
+ return True
15
+ except ImportError:
16
+ return False
17
+
9
18
  def get_pdf_dict(self) -> dict:
10
- # Ensure the file exists
19
+ # First check if the file exists
20
+ if not os.path.exists(self.pdf_path):
21
+ raise FileNotFoundError(f"The file {self.pdf_path} does not exist.")
22
+
23
+ # Then check if PyMuPDF is available
24
+ if not self._has_pymupdf:
25
+ raise ImportError(
26
+ "The 'fitz' module (PyMuPDF) is required for PDF extraction. "
27
+ "Please install it with: pip install pymupdf"
28
+ )
29
+
30
+ # If we get here, we can safely import and use fitz
11
31
  import fitz
12
32
 
13
33
  if not os.path.exists(self.pdf_path):
@@ -64,6 +64,15 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
64
64
  self.data = data if data is not None else {}
65
65
  self.name = name
66
66
 
67
+ def __mul__(self, scenario_list_or_scenario: Union["ScenarioList", "Scenario"]) -> "ScenarioList":
68
+ from edsl.scenarios.ScenarioList import ScenarioList
69
+ if isinstance(scenario_list_or_scenario, ScenarioList):
70
+ return scenario_list_or_scenario * self
71
+ elif isinstance(scenario_list_or_scenario, Scenario):
72
+ return ScenarioList([self]) * scenario_list_or_scenario
73
+ else:
74
+ raise TypeError(f"Cannot multiply Scenario with {type(scenario_list_or_scenario)}")
75
+
67
76
  def replicate(self, n: int) -> "ScenarioList":
68
77
  """Replicate a scenario n times to return a ScenarioList.
69
78
 
@@ -356,11 +365,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
356
365
 
357
366
  @classmethod
358
367
  def from_pdf(cls, pdf_path: str):
359
- from edsl.scenarios.PdfExtractor import PdfExtractor
360
-
361
- extractor = PdfExtractor(pdf_path)
362
- return Scenario(extractor.get_pdf_dict())
363
-
368
+ """Create a Scenario from a PDF file."""
369
+ try:
370
+ from edsl.scenarios.PdfExtractor import PdfExtractor
371
+ extractor = PdfExtractor(pdf_path)
372
+ return Scenario(extractor.get_pdf_dict())
373
+ except ImportError as e:
374
+ raise ImportError(
375
+ f"Could not extract text from PDF: {str(e)}. "
376
+ "PDF extraction requires the PyMuPDF library. "
377
+ "Install it with: pip install pymupdf"
378
+ )
379
+
364
380
  @classmethod
365
381
  def from_pdf_to_image(cls, pdf_path, image_format="jpeg"):
366
382
  """
@@ -442,18 +458,18 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
442
458
 
443
459
  >>> s = Scenario({"text": "This is a test.\\nThis is a test.\\n\\nThis is a test."})
444
460
  >>> s.chunk("text", num_lines = 1)
445
- ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 1}), Scenario({'text': '', 'text_chunk': 2}), Scenario({'text': 'This is a test.', 'text_chunk': 3})])
461
+ ScenarioList([Scenario({'text': 'This is a test.', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': 'This is a test.', 'text_chunk': 1, 'text_char_count': 15, 'text_word_count': 4}), Scenario({'text': '', 'text_chunk': 2, 'text_char_count': 0, 'text_word_count': 0}), Scenario({'text': 'This is a test.', 'text_chunk': 3, 'text_char_count': 15, 'text_word_count': 4})])
446
462
 
447
463
  >>> s.chunk("text", num_words = 2)
448
- ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0}), Scenario({'text': 'a test.', 'text_chunk': 1}), Scenario({'text': 'This is', 'text_chunk': 2}), Scenario({'text': 'a test.', 'text_chunk': 3}), Scenario({'text': 'This is', 'text_chunk': 4}), Scenario({'text': 'a test.', 'text_chunk': 5})])
464
+ ScenarioList([Scenario({'text': 'This is', 'text_chunk': 0, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 1, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 2, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 3, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'This is', 'text_chunk': 4, 'text_char_count': 7, 'text_word_count': 2}), Scenario({'text': 'a test.', 'text_chunk': 5, 'text_char_count': 7, 'text_word_count': 2})])
449
465
 
450
466
  >>> s = Scenario({"text": "Hello World"})
451
467
  >>> s.chunk("text", num_words = 1, include_original = True)
452
- ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'Hello World'})])
468
+ ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'Hello World'})])
453
469
 
454
470
  >>> s = Scenario({"text": "Hello World"})
455
471
  >>> s.chunk("text", num_words = 1, include_original = True, hash_original = True)
456
- ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
472
+ ScenarioList([Scenario({'text': 'Hello', 'text_chunk': 0, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'}), Scenario({'text': 'World', 'text_chunk': 1, 'text_char_count': 5, 'text_word_count': 1, 'text_original': 'b10a8db164e0754105b7a99be72e3fe5'})])
457
473
 
458
474
  >>> s.chunk("text")
459
475
  Traceback (most recent call last):
@@ -360,6 +360,11 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
360
360
  ScenarioList([Scenario({'a': 1, 'b': 3}), Scenario({'a': 1, 'b': 4}), Scenario({'a': 2, 'b': 3}), Scenario({'a': 2, 'b': 4})])
361
361
  """
362
362
  from itertools import product
363
+ from edsl import Scenario
364
+ if isinstance(other, Scenario):
365
+ other = ScenarioList([other])
366
+ elif not isinstance(other, ScenarioList):
367
+ raise TypeError(f"Cannot multiply ScenarioList with {type(other)}")
363
368
 
364
369
  new_sl = []
365
370
  for s1, s2 in list(product(self, other)):
@@ -431,35 +436,98 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
431
436
  new_scenarios.append(new_scenario)
432
437
  return ScenarioList(new_scenarios)
433
438
 
434
- def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
435
- """Concatenate specified fields into a single field.
436
-
439
+ def _concatenate(self, fields: List[str], output_type: str = "string", separator: str = ";") -> ScenarioList:
440
+ """Private method to handle concatenation logic for different output types.
441
+
437
442
  :param fields: The fields to concatenate.
438
- :param separator: The separator to use.
439
-
443
+ :param output_type: The type of output ("string", "list", or "set").
444
+ :param separator: The separator to use for string concatenation.
445
+
440
446
  Returns:
441
447
  ScenarioList: A new ScenarioList with concatenated fields.
442
-
443
- Example:
444
- >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
445
- >>> s.concatenate(['a', 'b', 'c'])
446
- ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
447
448
  """
449
+ # Check if fields is a string and raise an exception
450
+ if isinstance(fields, str):
451
+ raise ScenarioError(
452
+ f"The 'fields' parameter must be a list of field names, not a string. Got '{fields}'."
453
+ )
454
+
448
455
  new_scenarios = []
449
456
  for scenario in self:
450
457
  new_scenario = scenario.copy()
451
- concat_values = []
458
+ values = []
452
459
  for field in fields:
453
460
  if field in new_scenario:
454
- concat_values.append(str(new_scenario[field]))
461
+ values.append(new_scenario[field])
455
462
  del new_scenario[field]
456
463
 
457
464
  new_field_name = f"concat_{'_'.join(fields)}"
458
- new_scenario[new_field_name] = separator.join(concat_values)
465
+
466
+ if output_type == "string":
467
+ # Convert all values to strings and join with separator
468
+ new_scenario[new_field_name] = separator.join(str(v) for v in values)
469
+ elif output_type == "list":
470
+ # Keep as a list
471
+ new_scenario[new_field_name] = values
472
+ elif output_type == "set":
473
+ # Convert to a set (removes duplicates)
474
+ new_scenario[new_field_name] = set(values)
475
+ else:
476
+ raise ValueError(f"Invalid output_type: {output_type}. Must be 'string', 'list', or 'set'.")
477
+
459
478
  new_scenarios.append(new_scenario)
460
479
 
461
480
  return ScenarioList(new_scenarios)
462
481
 
482
+ def concatenate(self, fields: List[str], separator: str = ";") -> ScenarioList:
483
+ """Concatenate specified fields into a single string field.
484
+
485
+ :param fields: The fields to concatenate.
486
+ :param separator: The separator to use.
487
+
488
+ Returns:
489
+ ScenarioList: A new ScenarioList with concatenated fields.
490
+
491
+ Example:
492
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
493
+ >>> s.concatenate(['a', 'b', 'c'])
494
+ ScenarioList([Scenario({'concat_a_b_c': '1;2;3'}), Scenario({'concat_a_b_c': '4;5;6'})])
495
+ """
496
+ return self._concatenate(fields, output_type="string", separator=separator)
497
+
498
+ def concatenate_to_list(self, fields: List[str]) -> ScenarioList:
499
+ """Concatenate specified fields into a single list field.
500
+
501
+ :param fields: The fields to concatenate.
502
+
503
+ Returns:
504
+ ScenarioList: A new ScenarioList with fields concatenated into a list.
505
+
506
+ Example:
507
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
508
+ >>> s.concatenate_to_list(['a', 'b', 'c'])
509
+ ScenarioList([Scenario({'concat_a_b_c': [1, 2, 3]}), Scenario({'concat_a_b_c': [4, 5, 6]})])
510
+ """
511
+ return self._concatenate(fields, output_type="list")
512
+
513
+ def concatenate_to_set(self, fields: List[str]) -> ScenarioList:
514
+ """Concatenate specified fields into a single set field.
515
+
516
+ :param fields: The fields to concatenate.
517
+
518
+ Returns:
519
+ ScenarioList: A new ScenarioList with fields concatenated into a set.
520
+
521
+ Example:
522
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 2, 'c': 3}), Scenario({'a': 4, 'b': 5, 'c': 6})])
523
+ >>> s.concatenate_to_set(['a', 'b', 'c'])
524
+ ScenarioList([Scenario({'concat_a_b_c': {1, 2, 3}}), Scenario({'concat_a_b_c': {4, 5, 6}})])
525
+ >>> s = ScenarioList([Scenario({'a': 1, 'b': 1, 'c': 3})])
526
+ >>> s.concatenate_to_set(['a', 'b', 'c'])
527
+ ScenarioList([Scenario({'concat_a_b_c': {1, 3}})])
528
+ """
529
+ return self._concatenate(fields, output_type="set")
530
+
463
531
  def unpack_dict(
464
532
  self, field: str, prefix: Optional[str] = None, drop_field: bool = False
465
533
  ) -> ScenarioList:
@@ -872,7 +940,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
872
940
  for scenario in sl:
873
941
  scenario[name] = value
874
942
  return sl
875
-
943
+
876
944
  def rename(self, replacement_dict: dict) -> ScenarioList:
877
945
  """Rename the fields in the scenarios.
878
946
 
@@ -885,13 +953,35 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
885
953
  ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
886
954
 
887
955
  """
888
-
889
956
  new_list = ScenarioList([])
890
957
  for obj in self:
891
958
  new_obj = obj.rename(replacement_dict)
892
959
  new_list.append(new_obj)
893
960
  return new_list
894
961
 
962
+ def replace_names(self, new_names: list) -> ScenarioList:
963
+ """Replace the field names in the scenarios with a new list of names.
964
+
965
+ :param new_names: A list of new field names to use.
966
+
967
+ Example:
968
+
969
+ >>> s = ScenarioList([Scenario({'name': 'Alice', 'age': 30}), Scenario({'name': 'Bob', 'age': 25})])
970
+ >>> s.replace_names(['first_name', 'years'])
971
+ ScenarioList([Scenario({'first_name': 'Alice', 'years': 30}), Scenario({'first_name': 'Bob', 'years': 25})])
972
+ """
973
+ if not self:
974
+ return ScenarioList([])
975
+
976
+ if len(new_names) != len(self[0].keys()):
977
+ raise ScenarioError(
978
+ f"Length of new names ({len(new_names)}) does not match number of fields ({len(self[0].keys())})"
979
+ )
980
+
981
+ old_names = list(self[0].keys())
982
+ replacement_dict = dict(zip(old_names, new_names))
983
+ return self.rename(replacement_dict)
984
+
895
985
  ## NEEDS TO BE FIXED
896
986
  # def new_column_names(self, new_names: List[str]) -> ScenarioList:
897
987
  # """Rename the fields in the scenarios.
@@ -910,16 +1000,42 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
910
1000
  # return new_list
911
1001
 
912
1002
  @classmethod
913
- def from_sqlite(cls, filepath: str, table: str):
914
- """Create a ScenarioList from a SQLite database."""
1003
+ def from_sqlite(cls, filepath: str, table: Optional[str] = None, sql_query: Optional[str] = None):
1004
+ """Create a ScenarioList from a SQLite database.
1005
+
1006
+ Args:
1007
+ filepath (str): Path to the SQLite database file
1008
+ table (Optional[str]): Name of table to query. If None, sql_query must be provided.
1009
+ sql_query (Optional[str]): SQL query to execute. Used if table is None.
1010
+
1011
+ Returns:
1012
+ ScenarioList: List of scenarios created from database rows
1013
+
1014
+ Raises:
1015
+ ValueError: If both table and sql_query are None
1016
+ sqlite3.Error: If there is an error executing the database query
1017
+ """
915
1018
  import sqlite3
916
1019
 
917
- with sqlite3.connect(filepath) as conn:
918
- cursor = conn.cursor()
919
- cursor.execute(f"SELECT * FROM {table}")
920
- columns = [description[0] for description in cursor.description]
921
- data = cursor.fetchall()
922
- return cls([Scenario(dict(zip(columns, row))) for row in data])
1020
+ if table is None and sql_query is None:
1021
+ raise ValueError("Either table or sql_query must be provided")
1022
+
1023
+ try:
1024
+ with sqlite3.connect(filepath) as conn:
1025
+ cursor = conn.cursor()
1026
+
1027
+ if table is not None:
1028
+ cursor.execute(f"SELECT * FROM {table}")
1029
+ else:
1030
+ cursor.execute(sql_query)
1031
+
1032
+ columns = [description[0] for description in cursor.description]
1033
+ data = cursor.fetchall()
1034
+
1035
+ return cls([Scenario(dict(zip(columns, row))) for row in data])
1036
+
1037
+ except sqlite3.Error as e:
1038
+ raise sqlite3.Error(f"Database error occurred: {str(e)}")
923
1039
 
924
1040
  @classmethod
925
1041
  def from_latex(cls, tex_file_path: str):
@@ -935,6 +1051,8 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
935
1051
  entry = {
936
1052
  "line_no": line_no + 1, # Using 1-based index for line numbers
937
1053
  "text": text,
1054
+ "num_words": len(text.split()),
1055
+ "num_chars": len(text),
938
1056
  "line_before": non_blank_lines[index - 1][1] if index > 0 else None,
939
1057
  "line_after": (
940
1058
  non_blank_lines[index + 1][1]
@@ -995,8 +1113,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
995
1113
  ScenarioList([Scenario({'name': 'Alice', 'age': 30, 'location': 'New York'}), Scenario({'name': 'Bob', 'age': 25, 'location': 'Los Angeles'})])
996
1114
  """
997
1115
  return cls([Scenario(row) for row in df.to_dict(orient="records")])
1116
+
998
1117
 
999
1118
  @classmethod
1119
+ def from_dta(cls, filepath: str, include_metadata: bool = True) -> ScenarioList:
1120
+ """Create a ScenarioList from a Stata file.
1121
+
1122
+ Args:
1123
+ filepath (str): Path to the Stata (.dta) file
1124
+ include_metadata (bool): If True, extract and preserve variable labels and value labels
1125
+ as additional metadata in the ScenarioList
1126
+
1127
+ Returns:
1128
+ ScenarioList: A ScenarioList containing the data from the Stata file
1129
+ """
1130
+ import pandas as pd
1131
+
1132
+ # Read the Stata file with pandas
1133
+ df = pd.read_stata(filepath)
1134
+
1135
+ # Create the basic ScenarioList
1136
+ scenario_list = cls.from_pandas(df)
1137
+
1138
+ # Extract and preserve metadata if requested
1139
+ if include_metadata:
1140
+ # Get variable labels (if any)
1141
+ variable_labels = {}
1142
+ if hasattr(df, 'variable_labels') and df.variable_labels:
1143
+ variable_labels = df.variable_labels
1144
+
1145
+ # Get value labels (if any)
1146
+ value_labels = {}
1147
+ if hasattr(df, 'value_labels') and df.value_labels:
1148
+ value_labels = df.value_labels
1149
+
1150
+ # Store the metadata in the ScenarioList's codebook
1151
+ if variable_labels or value_labels:
1152
+ scenario_list.codebook = {
1153
+ 'variable_labels': variable_labels,
1154
+ 'value_labels': value_labels
1155
+ }
1156
+
1157
+ return scenario_list
1158
+
1000
1159
  def from_wikipedia(cls, url: str, table_index: int = 0):
1001
1160
  """
1002
1161
  Extracts a table from a Wikipedia page.
@@ -1456,7 +1615,7 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1456
1615
 
1457
1616
  >>> s = ScenarioList([Scenario({'text': 'The quick brown fox jumps over the lazy dog.'})])
1458
1617
  >>> s.chunk('text', num_words=3)
1459
- ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0}), Scenario({'text': 'fox jumps over', 'text_chunk': 1}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2})])
1618
+ ScenarioList([Scenario({'text': 'The quick brown', 'text_chunk': 0, 'text_char_count': 15, 'text_word_count': 3}), Scenario({'text': 'fox jumps over', 'text_chunk': 1, 'text_char_count': 14, 'text_word_count': 3}), Scenario({'text': 'the lazy dog.', 'text_chunk': 2, 'text_char_count': 13, 'text_word_count': 3})])
1460
1619
  """
1461
1620
  new_scenarios = []
1462
1621
  for scenario in self:
@@ -1470,6 +1629,49 @@ class ScenarioList(Base, UserList, ScenarioListMixin):
1470
1629
  new_scenarios.extend(replacement_scenarios)
1471
1630
  return ScenarioList(new_scenarios)
1472
1631
 
1632
+ def collapse(self, field: str) -> ScenarioList:
1633
+ """Collapse a ScenarioList by grouping on all fields except the specified one,
1634
+ collecting the values of the specified field into a list.
1635
+
1636
+ Args:
1637
+ field: The field to collapse (whose values will be collected into lists)
1638
+
1639
+ Returns:
1640
+ ScenarioList: A new ScenarioList with the specified field collapsed into lists
1641
+
1642
+ Example:
1643
+ >>> s = ScenarioList([
1644
+ ... Scenario({'category': 'fruit', 'color': 'red', 'item': 'apple'}),
1645
+ ... Scenario({'category': 'fruit', 'color': 'yellow', 'item': 'banana'}),
1646
+ ... Scenario({'category': 'fruit', 'color': 'red', 'item': 'cherry'}),
1647
+ ... Scenario({'category': 'vegetable', 'color': 'green', 'item': 'spinach'})
1648
+ ... ])
1649
+ >>> s.collapse('item')
1650
+ ScenarioList([Scenario({'category': 'fruit', 'color': 'red', 'item': ['apple', 'cherry']}), Scenario({'category': 'fruit', 'color': 'yellow', 'item': ['banana']}), Scenario({'category': 'vegetable', 'color': 'green', 'item': ['spinach']})])
1651
+ """
1652
+ if not self:
1653
+ return ScenarioList([])
1654
+
1655
+ # Determine all fields except the one to collapse
1656
+ id_vars = [key for key in self[0].keys() if key != field]
1657
+
1658
+ # Group the scenarios
1659
+ grouped = defaultdict(list)
1660
+ for scenario in self:
1661
+ # Create a tuple of the values of all fields except the one to collapse
1662
+ key = tuple(scenario[id_var] for id_var in id_vars)
1663
+ # Add the value of the field to collapse to the list for this key
1664
+ grouped[key].append(scenario[field])
1665
+
1666
+ # Create a new ScenarioList with the collapsed field
1667
+ result = []
1668
+ for key, values in grouped.items():
1669
+ new_scenario = dict(zip(id_vars, key))
1670
+ new_scenario[field] = values
1671
+ result.append(Scenario(new_scenario))
1672
+
1673
+ return ScenarioList(result)
1674
+
1473
1675
 
1474
1676
  if __name__ == "__main__":
1475
1677
  import doctest
@@ -11,3 +11,4 @@ from .pptx import PptxMethods
11
11
  from .latex import LaTeXMethods
12
12
  from .py import PyMethods
13
13
  from .sqlite import SQLiteMethods
14
+ from .jpeg import JpegMethods
@@ -37,7 +37,11 @@ class DocxMethods(FileMethods):
37
37
  print("DOCX file was not found.")
38
38
 
39
39
  def view_notebook(self):
40
- import mammoth
40
+ try:
41
+ import mammoth
42
+ except ImportError:
43
+ print("mammoth is not installed. Please install it using 'pip install mammoth'.")
44
+ return
41
45
  from IPython.display import HTML, display
42
46
 
43
47
  with open(self.path, "rb") as docx_file:
@@ -0,0 +1,39 @@
1
+ import tempfile
2
+ from edsl.scenarios.file_methods import FileMethods
3
+
4
+
5
+ class JpegMethods(FileMethods):
6
+ suffix = "jpeg"
7
+
8
+ def view_system(self):
9
+ import os
10
+ import subprocess
11
+
12
+ if os.path.exists(self.path):
13
+ try:
14
+ if (os_name := os.name) == "posix":
15
+ subprocess.run(["open", self.path], check=True) # macOS
16
+ elif os_name == "nt":
17
+ os.startfile(self.path) # Windows
18
+ else:
19
+ subprocess.run(["xdg-open", self.path], check=True) # Linux
20
+ except Exception as e:
21
+ print(f"Error opening JPEG: {e}")
22
+ else:
23
+ print("JPEG file was not found.")
24
+
25
+ def view_notebook(self):
26
+ from IPython.display import Image, display
27
+
28
+ display(Image(filename=self.path))
29
+
30
+ def example(self):
31
+ import matplotlib.pyplot as plt
32
+ import numpy as np
33
+
34
+ x = np.linspace(0, 10, 100)
35
+ y = np.sin(x)
36
+ plt.plot(x, y)
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpeg") as f:
38
+ plt.savefig(f.name)
39
+ return f.name
edsl/surveys/Survey.py CHANGED
@@ -1248,14 +1248,15 @@ class Survey(SurveyExportMixin, Base):
1248
1248
  ###################
1249
1249
  def humanize(
1250
1250
  self,
1251
- project_name: str,
1251
+ project_name: str = "Project",
1252
1252
  survey_description: Optional[str] = None,
1253
1253
  survey_alias: Optional[str] = None,
1254
1254
  survey_visibility: Optional["VisibilityType"] = "unlisted",
1255
- ):
1255
+ ) -> dict:
1256
1256
  """
1257
- Create a survey object on Coop.
1258
- Then, create a project on Coop so you can share the survey with humans.
1257
+ Send the survey to Coop.
1258
+
1259
+ Then, create a project on Coop so you can share the survey with human respondents.
1259
1260
  """
1260
1261
  from edsl.coop import Coop
1261
1262