edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsl/__init__.py +66 -0
- edsl/__version__.py +1 -1
- edsl/base/base_class.py +53 -0
- edsl/cli.py +93 -27
- edsl/config/config_class.py +4 -0
- edsl/coop/coop.py +403 -28
- edsl/coop/coop_jobs_objects.py +2 -2
- edsl/coop/coop_regular_objects.py +3 -1
- edsl/dataset/dataset.py +47 -41
- edsl/dataset/dataset_operations_mixin.py +138 -15
- edsl/dataset/report_from_template.py +509 -0
- edsl/inference_services/services/azure_ai.py +8 -2
- edsl/inference_services/services/open_ai_service.py +7 -5
- edsl/jobs/jobs.py +5 -4
- edsl/jobs/jobs_checks.py +11 -6
- edsl/jobs/remote_inference.py +17 -10
- edsl/prompts/prompt.py +7 -2
- edsl/questions/question_registry.py +4 -1
- edsl/results/result.py +93 -38
- edsl/results/results.py +24 -15
- edsl/scenarios/file_store.py +69 -0
- edsl/scenarios/scenario.py +233 -0
- edsl/scenarios/scenario_list.py +294 -130
- edsl/scenarios/scenario_source.py +1 -2
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/METADATA +1 -1
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/RECORD +29 -28
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/LICENSE +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/WHEEL +0 -0
- {edsl-0.1.61.dist-info → edsl-1.0.0.dist-info}/entry_points.txt +0 -0
@@ -339,6 +339,37 @@ class DataOperationsBase:
|
|
339
339
|
)
|
340
340
|
return exporter.export()
|
341
341
|
|
342
|
+
def clipboard_data(self) -> str:
|
343
|
+
"""Return TSV representation of this object for clipboard operations.
|
344
|
+
|
345
|
+
This method is called by the clipboard() method in the base class to provide
|
346
|
+
a custom format for copying objects to the system clipboard.
|
347
|
+
|
348
|
+
Returns:
|
349
|
+
str: Tab-separated values representation of the object
|
350
|
+
"""
|
351
|
+
# Use the to_csv method to get CSV data
|
352
|
+
csv_filestore = self.to_csv()
|
353
|
+
|
354
|
+
# Get the CSV content and convert it to TSV
|
355
|
+
csv_content = csv_filestore.text
|
356
|
+
|
357
|
+
# Convert CSV to TSV by replacing commas with tabs
|
358
|
+
# This is a simple approach, but we should handle quoted fields properly
|
359
|
+
import csv
|
360
|
+
import io
|
361
|
+
|
362
|
+
# Parse the CSV content
|
363
|
+
csv_reader = csv.reader(io.StringIO(csv_content))
|
364
|
+
rows = list(csv_reader)
|
365
|
+
|
366
|
+
# Convert to TSV format
|
367
|
+
tsv_lines = []
|
368
|
+
for row in rows:
|
369
|
+
tsv_lines.append('\t'.join(row))
|
370
|
+
|
371
|
+
return '\n'.join(tsv_lines)
|
372
|
+
|
342
373
|
def _db(self, remove_prefix: bool = True, shape: str = "wide"):
|
343
374
|
"""Create a SQLite database in memory and return the connection.
|
344
375
|
|
@@ -357,7 +388,7 @@ class DataOperationsBase:
|
|
357
388
|
4
|
358
389
|
>>> engine = Results.example()._db(shape = "long")
|
359
390
|
>>> len(engine.execute(text("SELECT * FROM self")).fetchall())
|
360
|
-
|
391
|
+
220
|
361
392
|
"""
|
362
393
|
# Import needed for database connection
|
363
394
|
from sqlalchemy import create_engine
|
@@ -442,7 +473,7 @@ class DataOperationsBase:
|
|
442
473
|
|
443
474
|
# Using long format
|
444
475
|
>>> len(r.sql("SELECT * FROM self", shape="long"))
|
445
|
-
|
476
|
+
220
|
446
477
|
"""
|
447
478
|
import pandas as pd
|
448
479
|
|
@@ -1085,7 +1116,7 @@ class DataOperationsBase:
|
|
1085
1116
|
>>> d = Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}])
|
1086
1117
|
>>> d.flatten('a', keep_original=True)
|
1087
1118
|
Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
|
1088
|
-
|
1119
|
+
|
1089
1120
|
# Can also use unambiguous unprefixed field name
|
1090
1121
|
>>> result = Dataset([{'answer.pros_cons': [{'pros': ['Safety'], 'cons': ['Cost']}]}]).flatten('pros_cons')
|
1091
1122
|
>>> sorted(result.keys()) == ['answer.pros_cons.cons', 'answer.pros_cons.pros']
|
@@ -1098,7 +1129,7 @@ class DataOperationsBase:
|
|
1098
1129
|
# Ensure the dataset isn't empty
|
1099
1130
|
if not self.data:
|
1100
1131
|
return self.copy()
|
1101
|
-
|
1132
|
+
|
1102
1133
|
# First try direct match with the exact field name
|
1103
1134
|
field_entry = None
|
1104
1135
|
for entry in self.data:
|
@@ -1106,18 +1137,18 @@ class DataOperationsBase:
|
|
1106
1137
|
if field == col_name:
|
1107
1138
|
field_entry = entry
|
1108
1139
|
break
|
1109
|
-
|
1140
|
+
|
1110
1141
|
# If not found, try to match by unprefixed name
|
1111
1142
|
if field_entry is None:
|
1112
1143
|
# Find any columns that have field as their unprefixed name
|
1113
1144
|
candidates = []
|
1114
1145
|
for entry in self.data:
|
1115
1146
|
col_name = next(iter(entry.keys()))
|
1116
|
-
if
|
1117
|
-
prefix, col_field = col_name.split(
|
1147
|
+
if "." in col_name:
|
1148
|
+
prefix, col_field = col_name.split(".", 1)
|
1118
1149
|
if col_field == field:
|
1119
1150
|
candidates.append(entry)
|
1120
|
-
|
1151
|
+
|
1121
1152
|
# If we found exactly one match by unprefixed name, use it
|
1122
1153
|
if len(candidates) == 1:
|
1123
1154
|
field_entry = candidates[0]
|
@@ -1125,6 +1156,7 @@ class DataOperationsBase:
|
|
1125
1156
|
elif len(candidates) > 1:
|
1126
1157
|
matching_cols = [next(iter(entry.keys())) for entry in candidates]
|
1127
1158
|
from .exceptions import DatasetValueError
|
1159
|
+
|
1128
1160
|
raise DatasetValueError(
|
1129
1161
|
f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
|
1130
1162
|
f"Please specify the full column name to flatten."
|
@@ -1134,24 +1166,27 @@ class DataOperationsBase:
|
|
1134
1166
|
partial_matches = []
|
1135
1167
|
for entry in self.data:
|
1136
1168
|
col_name = next(iter(entry.keys()))
|
1137
|
-
if
|
1138
|
-
col_name.endswith(
|
1139
|
-
col_name.startswith(field +
|
1169
|
+
if "." in col_name and (
|
1170
|
+
col_name.endswith("." + field)
|
1171
|
+
or col_name.startswith(field + ".")
|
1140
1172
|
):
|
1141
1173
|
partial_matches.append(entry)
|
1142
|
-
|
1174
|
+
|
1143
1175
|
# If we found exactly one partial match, use it
|
1144
1176
|
if len(partial_matches) == 1:
|
1145
1177
|
field_entry = partial_matches[0]
|
1146
1178
|
# If we found multiple partial matches, it's ambiguous
|
1147
1179
|
elif len(partial_matches) > 1:
|
1148
|
-
matching_cols = [
|
1180
|
+
matching_cols = [
|
1181
|
+
next(iter(entry.keys())) for entry in partial_matches
|
1182
|
+
]
|
1149
1183
|
from .exceptions import DatasetValueError
|
1184
|
+
|
1150
1185
|
raise DatasetValueError(
|
1151
1186
|
f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
|
1152
1187
|
f"Please specify the full column name to flatten."
|
1153
1188
|
)
|
1154
|
-
|
1189
|
+
|
1155
1190
|
# Get the number of observations
|
1156
1191
|
num_observations = self.num_observations()
|
1157
1192
|
|
@@ -1161,7 +1196,7 @@ class DataOperationsBase:
|
|
1161
1196
|
f"Field '{field}' not found in dataset, returning original dataset"
|
1162
1197
|
)
|
1163
1198
|
return self.copy()
|
1164
|
-
|
1199
|
+
|
1165
1200
|
# Get the actual field name as it appears in the data
|
1166
1201
|
actual_field = next(iter(field_entry.keys()))
|
1167
1202
|
|
@@ -1343,6 +1378,94 @@ class DataOperationsBase:
|
|
1343
1378
|
|
1344
1379
|
return Dataset(new_data)
|
1345
1380
|
|
1381
|
+
def report_from_template(
|
1382
|
+
self,
|
1383
|
+
template: str,
|
1384
|
+
*fields: Optional[str],
|
1385
|
+
top_n: Optional[int] = None,
|
1386
|
+
remove_prefix: bool = True,
|
1387
|
+
return_string: bool = False,
|
1388
|
+
format: str = "text",
|
1389
|
+
filename: Optional[str] = None,
|
1390
|
+
separator: str = "\n\n",
|
1391
|
+
observation_title_template: Optional[str] = None,
|
1392
|
+
explode: bool = False,
|
1393
|
+
markdown_to_docx: bool = True,
|
1394
|
+
use_pandoc: bool = True,
|
1395
|
+
) -> Optional[Union[str, "Document", List]]:
|
1396
|
+
"""Generates a report using a Jinja2 template for each row in the dataset.
|
1397
|
+
|
1398
|
+
This method renders a user-provided Jinja2 template for each observation in the dataset,
|
1399
|
+
with template variables populated from the row data. This allows for completely customized
|
1400
|
+
report formatting.
|
1401
|
+
|
1402
|
+
Args:
|
1403
|
+
template: Jinja2 template string to render for each row
|
1404
|
+
*fields: The fields to include in template context. If none provided, all fields are used.
|
1405
|
+
top_n: Optional limit on the number of observations to include.
|
1406
|
+
remove_prefix: Whether to remove type prefixes (e.g., "answer.") from field names in template context.
|
1407
|
+
return_string: If True, returns the rendered string. If False (default in notebooks),
|
1408
|
+
only displays the content without returning.
|
1409
|
+
format: Output format - either "text" or "docx".
|
1410
|
+
filename: If provided, saves the rendered content to this file. For exploded output,
|
1411
|
+
this becomes a template (e.g., "report_{index}.docx").
|
1412
|
+
separator: String to use between rendered templates for each row (ignored when explode=True).
|
1413
|
+
observation_title_template: Optional Jinja2 template for observation titles.
|
1414
|
+
Defaults to "Observation {index}" where index is 1-based.
|
1415
|
+
Template has access to all row data plus 'index' and 'index0' variables.
|
1416
|
+
explode: If True, creates separate files for each observation instead of one combined file.
|
1417
|
+
markdown_to_docx: If True (default), treats template content as Markdown and converts it to proper DOCX formatting.
|
1418
|
+
Set to False to use plain text formatting (original behavior).
|
1419
|
+
use_pandoc: If True (default) and markdown_to_docx=True, uses pandoc for conversion (recommended).
|
1420
|
+
If False, uses a Python-based Markdown parser (requires markdown and python-docx libraries)
|
1421
|
+
|
1422
|
+
Returns:
|
1423
|
+
Depending on explode, format and return_string:
|
1424
|
+
- If explode=True: List of created filenames (when filename provided) or list of documents/strings
|
1425
|
+
- If explode=False: Same as before - string, Document, or None
|
1426
|
+
|
1427
|
+
Examples:
|
1428
|
+
>>> from edsl.results import Results
|
1429
|
+
>>> r = Results.example()
|
1430
|
+
>>> template = "Person feels: {{ how_feeling }}"
|
1431
|
+
>>> report = r.select('how_feeling').report_from_template(template, return_string=True)
|
1432
|
+
>>> "Person feels: OK" in report
|
1433
|
+
True
|
1434
|
+
>>> "Person feels: Great" in report
|
1435
|
+
True
|
1436
|
+
|
1437
|
+
# Custom observation titles
|
1438
|
+
>>> custom_title = "Response {{ index }}: {{ how_feeling }}"
|
1439
|
+
>>> report = r.select('how_feeling').report_from_template(
|
1440
|
+
... template, observation_title_template=custom_title, return_string=True)
|
1441
|
+
>>> "Response 1: OK" in report
|
1442
|
+
True
|
1443
|
+
|
1444
|
+
# Basic template functionality
|
1445
|
+
>>> template2 = "Feeling: {{ how_feeling }}, Index: {{ index }}"
|
1446
|
+
>>> report2 = r.select('how_feeling').report_from_template(
|
1447
|
+
... template2, return_string=True, top_n=2)
|
1448
|
+
>>> "Feeling: OK, Index: 1" in report2
|
1449
|
+
True
|
1450
|
+
"""
|
1451
|
+
from .report_from_template import TemplateReportGenerator
|
1452
|
+
|
1453
|
+
generator = TemplateReportGenerator(self)
|
1454
|
+
return generator.generate_report(
|
1455
|
+
template,
|
1456
|
+
*fields,
|
1457
|
+
top_n=top_n,
|
1458
|
+
remove_prefix=remove_prefix,
|
1459
|
+
return_string=return_string,
|
1460
|
+
format=format,
|
1461
|
+
filename=filename,
|
1462
|
+
separator=separator,
|
1463
|
+
observation_title_template=observation_title_template,
|
1464
|
+
explode=explode,
|
1465
|
+
markdown_to_docx=markdown_to_docx,
|
1466
|
+
use_pandoc=use_pandoc,
|
1467
|
+
)
|
1468
|
+
|
1346
1469
|
|
1347
1470
|
def to_dataset(func):
|
1348
1471
|
"""
|