edsl 0.1.61__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -339,6 +339,37 @@ class DataOperationsBase:
339
339
  )
340
340
  return exporter.export()
341
341
 
342
+ def clipboard_data(self) -> str:
343
+ """Return TSV representation of this object for clipboard operations.
344
+
345
+ This method is called by the clipboard() method in the base class to provide
346
+ a custom format for copying objects to the system clipboard.
347
+
348
+ Returns:
349
+ str: Tab-separated values representation of the object
350
+ """
351
+ # Use the to_csv method to get CSV data
352
+ csv_filestore = self.to_csv()
353
+
354
+ # Get the CSV content and convert it to TSV
355
+ csv_content = csv_filestore.text
356
+
357
+ # Convert CSV to TSV by replacing commas with tabs
358
+ # This is a simple approach, but we should handle quoted fields properly
359
+ import csv
360
+ import io
361
+
362
+ # Parse the CSV content
363
+ csv_reader = csv.reader(io.StringIO(csv_content))
364
+ rows = list(csv_reader)
365
+
366
+ # Convert to TSV format
367
+ tsv_lines = []
368
+ for row in rows:
369
+ tsv_lines.append('\t'.join(row))
370
+
371
+ return '\n'.join(tsv_lines)
372
+
342
373
  def _db(self, remove_prefix: bool = True, shape: str = "wide"):
343
374
  """Create a SQLite database in memory and return the connection.
344
375
 
@@ -357,7 +388,7 @@ class DataOperationsBase:
357
388
  4
358
389
  >>> engine = Results.example()._db(shape = "long")
359
390
  >>> len(engine.execute(text("SELECT * FROM self")).fetchall())
360
- 212
391
+ 220
361
392
  """
362
393
  # Import needed for database connection
363
394
  from sqlalchemy import create_engine
@@ -442,7 +473,7 @@ class DataOperationsBase:
442
473
 
443
474
  # Using long format
444
475
  >>> len(r.sql("SELECT * FROM self", shape="long"))
445
- 212
476
+ 220
446
477
  """
447
478
  import pandas as pd
448
479
 
@@ -1085,7 +1116,7 @@ class DataOperationsBase:
1085
1116
  >>> d = Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}])
1086
1117
  >>> d.flatten('a', keep_original=True)
1087
1118
  Dataset([{'a': [{'a': 1, 'b': 2}]}, {'c': [5]}, {'a.a': [1]}, {'a.b': [2]}])
1088
-
1119
+
1089
1120
  # Can also use unambiguous unprefixed field name
1090
1121
  >>> result = Dataset([{'answer.pros_cons': [{'pros': ['Safety'], 'cons': ['Cost']}]}]).flatten('pros_cons')
1091
1122
  >>> sorted(result.keys()) == ['answer.pros_cons.cons', 'answer.pros_cons.pros']
@@ -1098,7 +1129,7 @@ class DataOperationsBase:
1098
1129
  # Ensure the dataset isn't empty
1099
1130
  if not self.data:
1100
1131
  return self.copy()
1101
-
1132
+
1102
1133
  # First try direct match with the exact field name
1103
1134
  field_entry = None
1104
1135
  for entry in self.data:
@@ -1106,18 +1137,18 @@ class DataOperationsBase:
1106
1137
  if field == col_name:
1107
1138
  field_entry = entry
1108
1139
  break
1109
-
1140
+
1110
1141
  # If not found, try to match by unprefixed name
1111
1142
  if field_entry is None:
1112
1143
  # Find any columns that have field as their unprefixed name
1113
1144
  candidates = []
1114
1145
  for entry in self.data:
1115
1146
  col_name = next(iter(entry.keys()))
1116
- if '.' in col_name:
1117
- prefix, col_field = col_name.split('.', 1)
1147
+ if "." in col_name:
1148
+ prefix, col_field = col_name.split(".", 1)
1118
1149
  if col_field == field:
1119
1150
  candidates.append(entry)
1120
-
1151
+
1121
1152
  # If we found exactly one match by unprefixed name, use it
1122
1153
  if len(candidates) == 1:
1123
1154
  field_entry = candidates[0]
@@ -1125,6 +1156,7 @@ class DataOperationsBase:
1125
1156
  elif len(candidates) > 1:
1126
1157
  matching_cols = [next(iter(entry.keys())) for entry in candidates]
1127
1158
  from .exceptions import DatasetValueError
1159
+
1128
1160
  raise DatasetValueError(
1129
1161
  f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
1130
1162
  f"Please specify the full column name to flatten."
@@ -1134,24 +1166,27 @@ class DataOperationsBase:
1134
1166
  partial_matches = []
1135
1167
  for entry in self.data:
1136
1168
  col_name = next(iter(entry.keys()))
1137
- if '.' in col_name and (
1138
- col_name.endswith('.' + field) or
1139
- col_name.startswith(field + '.')
1169
+ if "." in col_name and (
1170
+ col_name.endswith("." + field)
1171
+ or col_name.startswith(field + ".")
1140
1172
  ):
1141
1173
  partial_matches.append(entry)
1142
-
1174
+
1143
1175
  # If we found exactly one partial match, use it
1144
1176
  if len(partial_matches) == 1:
1145
1177
  field_entry = partial_matches[0]
1146
1178
  # If we found multiple partial matches, it's ambiguous
1147
1179
  elif len(partial_matches) > 1:
1148
- matching_cols = [next(iter(entry.keys())) for entry in partial_matches]
1180
+ matching_cols = [
1181
+ next(iter(entry.keys())) for entry in partial_matches
1182
+ ]
1149
1183
  from .exceptions import DatasetValueError
1184
+
1150
1185
  raise DatasetValueError(
1151
1186
  f"Ambiguous field name '{field}'. It matches multiple columns: {matching_cols}. "
1152
1187
  f"Please specify the full column name to flatten."
1153
1188
  )
1154
-
1189
+
1155
1190
  # Get the number of observations
1156
1191
  num_observations = self.num_observations()
1157
1192
 
@@ -1161,7 +1196,7 @@ class DataOperationsBase:
1161
1196
  f"Field '{field}' not found in dataset, returning original dataset"
1162
1197
  )
1163
1198
  return self.copy()
1164
-
1199
+
1165
1200
  # Get the actual field name as it appears in the data
1166
1201
  actual_field = next(iter(field_entry.keys()))
1167
1202
 
@@ -1343,6 +1378,94 @@ class DataOperationsBase:
1343
1378
 
1344
1379
  return Dataset(new_data)
1345
1380
 
1381
+ def report_from_template(
1382
+ self,
1383
+ template: str,
1384
+ *fields: Optional[str],
1385
+ top_n: Optional[int] = None,
1386
+ remove_prefix: bool = True,
1387
+ return_string: bool = False,
1388
+ format: str = "text",
1389
+ filename: Optional[str] = None,
1390
+ separator: str = "\n\n",
1391
+ observation_title_template: Optional[str] = None,
1392
+ explode: bool = False,
1393
+ markdown_to_docx: bool = True,
1394
+ use_pandoc: bool = True,
1395
+ ) -> Optional[Union[str, "Document", List]]:
1396
+ """Generates a report using a Jinja2 template for each row in the dataset.
1397
+
1398
+ This method renders a user-provided Jinja2 template for each observation in the dataset,
1399
+ with template variables populated from the row data. This allows for completely customized
1400
+ report formatting.
1401
+
1402
+ Args:
1403
+ template: Jinja2 template string to render for each row
1404
+ *fields: The fields to include in template context. If none provided, all fields are used.
1405
+ top_n: Optional limit on the number of observations to include.
1406
+ remove_prefix: Whether to remove type prefixes (e.g., "answer.") from field names in template context.
1407
+ return_string: If True, returns the rendered string. If False (default in notebooks),
1408
+ only displays the content without returning.
1409
+ format: Output format - either "text" or "docx".
1410
+ filename: If provided, saves the rendered content to this file. For exploded output,
1411
+ this becomes a template (e.g., "report_{index}.docx").
1412
+ separator: String to use between rendered templates for each row (ignored when explode=True).
1413
+ observation_title_template: Optional Jinja2 template for observation titles.
1414
+ Defaults to "Observation {index}" where index is 1-based.
1415
+ Template has access to all row data plus 'index' and 'index0' variables.
1416
+ explode: If True, creates separate files for each observation instead of one combined file.
1417
+ markdown_to_docx: If True (default), treats template content as Markdown and converts it to proper DOCX formatting.
1418
+ Set to False to use plain text formatting (original behavior).
1419
+ use_pandoc: If True (default) and markdown_to_docx=True, uses pandoc for conversion (recommended).
1420
+ If False, uses a Python-based Markdown parser (requires markdown and python-docx libraries)
1421
+
1422
+ Returns:
1423
+ Depending on explode, format and return_string:
1424
+ - If explode=True: List of created filenames (when filename provided) or list of documents/strings
1425
+ - If explode=False: Same as before - string, Document, or None
1426
+
1427
+ Examples:
1428
+ >>> from edsl.results import Results
1429
+ >>> r = Results.example()
1430
+ >>> template = "Person feels: {{ how_feeling }}"
1431
+ >>> report = r.select('how_feeling').report_from_template(template, return_string=True)
1432
+ >>> "Person feels: OK" in report
1433
+ True
1434
+ >>> "Person feels: Great" in report
1435
+ True
1436
+
1437
+ # Custom observation titles
1438
+ >>> custom_title = "Response {{ index }}: {{ how_feeling }}"
1439
+ >>> report = r.select('how_feeling').report_from_template(
1440
+ ... template, observation_title_template=custom_title, return_string=True)
1441
+ >>> "Response 1: OK" in report
1442
+ True
1443
+
1444
+ # Basic template functionality
1445
+ >>> template2 = "Feeling: {{ how_feeling }}, Index: {{ index }}"
1446
+ >>> report2 = r.select('how_feeling').report_from_template(
1447
+ ... template2, return_string=True, top_n=2)
1448
+ >>> "Feeling: OK, Index: 1" in report2
1449
+ True
1450
+ """
1451
+ from .report_from_template import TemplateReportGenerator
1452
+
1453
+ generator = TemplateReportGenerator(self)
1454
+ return generator.generate_report(
1455
+ template,
1456
+ *fields,
1457
+ top_n=top_n,
1458
+ remove_prefix=remove_prefix,
1459
+ return_string=return_string,
1460
+ format=format,
1461
+ filename=filename,
1462
+ separator=separator,
1463
+ observation_title_template=observation_title_template,
1464
+ explode=explode,
1465
+ markdown_to_docx=markdown_to_docx,
1466
+ use_pandoc=use_pandoc,
1467
+ )
1468
+
1346
1469
 
1347
1470
  def to_dataset(func):
1348
1471
  """