079project 8.0.0 → 9.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,645 @@
1
+ import csv
2
+ import random
3
+ import re
4
+ from collections import Counter
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Callable, List, Tuple
8
+
9
+
10
+ BASE_DIR = Path(__file__).resolve().parent
11
+ QUESTION_CSV = BASE_DIR / "question.csv"
12
+ ANSWER_CSV = BASE_DIR / "answer.csv"
13
+
14
+
15
+ STOPWORDS = {
16
+ "a","an","the","and","or","but","so","to","of","in","on","at","for","from","with","without",
17
+ "is","are","was","were","be","been","being","do","does","did","can","could","would","should",
18
+ "i","me","my","mine","you","your","yours","we","our","ours","they","their","theirs","he","his","she","her",
19
+ "it","its","this","that","these","those","there","here","as","if","then","than","also","just","really","very",
20
+ "what","how","when","where","why","who","which",
21
+ }
22
+
23
+
24
+ def word_tokens(text: str) -> List[str]:
25
+ return [t for t in re.findall(r"[A-Za-z']+", text.lower()) if t]
26
+
27
+
28
+ def info_tokens(text: str) -> List[str]:
29
+ toks = [t for t in word_tokens(text) if t not in STOPWORDS]
30
+ return toks
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class QA:
35
+ question: str
36
+ answer: str
37
+ category: str
38
+
39
+
40
+ def pick(rng: random.Random, items: List[str]) -> str:
41
+ return items[rng.randrange(len(items))]
42
+
43
+
44
+ def fmt_list(items: List[str], start: int = 1) -> str:
45
+ # Important: test_automatic.cjs parses CSV line-by-line, so cells must not contain embedded newlines.
46
+ # Keep lists on a single physical line.
47
+ return " ".join([f"{i}. {v.strip().rstrip('.')}." for i, v in enumerate(items, start=start)])
48
+
49
+
50
+ def message(text: str) -> str:
51
+ # Keep messages realistic: one or two short sentences.
52
+ text = re.sub(r"[\r\n]+", " ", text)
53
+ text = re.sub(r"\s+", " ", text).strip()
54
+ return text
55
+
56
+
57
+ def sanitize_cell(text: str) -> str:
58
+ return message(str(text or ""))
59
+
60
+
61
+ def qa_check_in(rng: random.Random) -> QA:
62
+ context = pick(rng, [
63
+ "before my morning shift",
64
+ "after a stressful day at work",
65
+ "while I'm waiting for an appointment",
66
+ "on a quiet Sunday evening",
67
+ "during a rainy afternoon",
68
+ ])
69
+ detail = pick(rng, [
70
+ "I slept poorly and feel a bit off",
71
+ "I'm excited but slightly nervous",
72
+ "I'm drained and need a reset",
73
+ "I'm doing okay, just distracted",
74
+ ])
75
+ q = (
76
+ f"Write a friendly check-in text I can send to a friend {context}. "
77
+ f"Include one specific detail ({detail}) and end with one simple question that invites a reply."
78
+ )
79
+ opener = pick(rng, [
80
+ "Hey! Just checking in",
81
+ "Hi—quick check-in",
82
+ "Hey, thinking of you",
83
+ "Hi there—hope you're doing okay",
84
+ ])
85
+ a = message(
86
+ f"{opener}. {detail}. How are you doing today?"
87
+ )
88
+ return QA(q, a, "social")
89
+
90
+
91
+ def qa_decline_invite(rng: random.Random) -> QA:
92
+ who = pick(rng, ["coworker", "close friend", "neighbor", "cousin"])
93
+ event = pick(rng, ["dinner", "a birthday get-together", "drinks", "a weekend trip", "a movie night"])
94
+ reason = pick(rng, [
95
+ "I already have plans",
96
+ "I need a quiet night to rest",
97
+ "I'm trying to keep my budget tight this week",
98
+ "I'm not feeling 100%",
99
+ ])
100
+ alt = pick(rng, ["next Tuesday", "next weekend", "Thursday evening", "sometime next week"])
101
+ q = (
102
+ f"Draft a polite text to decline a {who}'s invitation to {event}. "
103
+ f"Be warm, give a short reason ({reason}) without oversharing, and suggest {alt} instead."
104
+ )
105
+ thanks = pick(rng, [
106
+ "Thanks for inviting me!",
107
+ "That sounds fun—thanks for asking!",
108
+ "I really appreciate the invite!",
109
+ ])
110
+ a = message(
111
+ f"{thanks} I can't make {event} this time because {reason}. "
112
+ f"If you're free, I'd love to try {alt} instead."
113
+ )
114
+ return QA(q, a, "social")
115
+
116
+
117
+ def qa_apology(rng: random.Random) -> QA:
118
+ situation = pick(rng, [
119
+ "I snapped in a group chat",
120
+ "I interrupted someone repeatedly",
121
+ "I made a joke that landed badly",
122
+ "I sounded dismissive in a meeting",
123
+ ])
124
+ repair = pick(rng, [
125
+ "offer to talk and listen",
126
+ "ask how I can make it right",
127
+ "promise a concrete change",
128
+ "acknowledge impact clearly",
129
+ ])
130
+ q = (
131
+ f"{situation}. Write a sincere apology message that takes responsibility, "
132
+ f"does not make excuses, and includes a repair step (" + repair + ")."
133
+ )
134
+ fix = pick(rng, [
135
+ "If you're open to it, I'd like to listen and make it right.",
136
+ "I'd like to understand how it landed and do better.",
137
+ "If you'd be willing, I'd like to talk and repair this.",
138
+ ])
139
+ a = message(
140
+ f"I'm sorry—{situation}. That was on me, and I can see how it could have felt disrespectful. {fix}"
141
+ )
142
+ return QA(q, a, "communication")
143
+
144
+
145
+ def qa_late_reply(rng: random.Random) -> QA:
146
+ delay = pick(rng, ["two days", "a few hours", "since yesterday", "all morning"])
147
+ reason = pick(rng, [
148
+ "traveling",
149
+ "in back-to-back meetings",
150
+ "away from my phone",
151
+ "handling something urgent",
152
+ "thinking it through before replying",
153
+ "offline for a bit",
154
+ ])
155
+ q = (
156
+ f"I forgot to reply for {delay} because {reason}. "
157
+ "Give me a natural one-sentence message that apologizes briefly and moves the conversation forward."
158
+ )
159
+ a = message(
160
+ f"Sorry for the slow reply—I was {reason} and just saw this. "
161
+ "What would you like to do next?"
162
+ )
163
+ return QA(q, a, "communication")
164
+
165
+
166
+ def qa_day_plan(rng: random.Random) -> QA:
167
+ day = pick(rng, ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"])
168
+ fixed = pick(rng, [
169
+ "a 9:30 AM meeting, a 1:00 PM dentist appointment, and a 4:00 PM deadline",
170
+ "two 30-minute meetings and one 2-hour deep work block",
171
+ "school pickup at 3:15 PM and a gym session in the evening",
172
+ "a 10:00 AM presentation and errands that take about 90 minutes",
173
+ ])
174
+ top_goal = pick(rng, [
175
+ "finish a draft report",
176
+ "clean up my inbox",
177
+ "study for an exam",
178
+ "ship a small feature",
179
+ "organize my finances",
180
+ ])
181
+ q = (
182
+ f"Help me plan my {day}. I have {fixed}. My top goal is to {top_goal}. "
183
+ "Give me a realistic schedule with time blocks, one short break, and one buffer block for surprises."
184
+ )
185
+ a = (
186
+ f"A realistic {day} plan: "
187
+ + fmt_list([
188
+ f"Fixed blocks: {fixed} (put these on the calendar first).",
189
+ f"Focus block: 60–90 minutes on '{top_goal}' when you feel most alert.",
190
+ "Break: 10 minutes (water + short walk + no phone).",
191
+ "Admin/messages: 30 minutes in one batch, not all day.",
192
+ "Buffer: 20 minutes near the end for surprises.",
193
+ ])
194
+ )
195
+ return QA(q, a, "planning")
196
+
197
+
198
+ def qa_focus_setup(rng: random.Random) -> QA:
199
+ distract = pick(rng, ["Slack pings", "phone scrolling", "too many browser tabs", "a noisy room"])
200
+ task = pick(rng, ["writing a report", "studying", "coding", "doing budgeting"])
201
+ duration = pick(rng, ["20 minutes", "30 minutes", "45 minutes"])
202
+ q = (
203
+ f"I keep losing focus because of {distract}. I need {duration} of focused work for {task}. "
204
+ "Give me a setup checklist with 6 concrete actions I can do in 3 minutes before I start."
205
+ )
206
+ a = (
207
+ f"3-minute setup for {duration} of {task}: "
208
+ + fmt_list([
209
+ f"Mute {distract} (or set 'Do Not Disturb').",
210
+ "Close unrelated tabs/apps; keep only what you need.",
211
+ "Put phone out of reach (drawer or another room).",
212
+ f"Set a timer for {duration}.",
213
+ f"Write one outcome: 'After {duration}, I will have ____ for {task}.'",
214
+ "Start with a 2-minute easy step (outline, rename file, open notes).",
215
+ ])
216
+ )
217
+ return QA(q, a, "productivity")
218
+
219
+
220
+ def qa_anxiety(rng: random.Random) -> QA:
221
+ when = pick(rng, ["a meeting", "a phone call", "a presentation", "an interview"])
222
+ minutes = pick(rng, ["5", "8", "10", "12"])
223
+ q = (
224
+ f"I feel anxious before {when}. I have {minutes} minutes right now. "
225
+ "Give me a short routine: one breathing pattern, one body posture tip, and one grounding phrase I can say to myself."
226
+ )
227
+ phrase = pick(rng, [
228
+ "I can handle the first minute calmly.",
229
+ "Slow is smooth; smooth is fast.",
230
+ "One step at a time—start with hello.",
231
+ ])
232
+ a = (
233
+ f"Quick routine for {when} (about {minutes} minutes): "
234
+ + fmt_list([
235
+ "Breathing: inhale 4, exhale 6, repeat for 2 minutes.",
236
+ "Posture: feet flat, shoulders down, relax your jaw.",
237
+ f"Grounding phrase: '{phrase}'",
238
+ ])
239
+ )
240
+ return QA(q, a, "wellbeing")
241
+
242
+
243
+ def qa_sleep(rng: random.Random) -> QA:
244
+ cause = pick(rng, ["scrolling my phone", "late caffeine", "worrying about work", "irregular bedtimes"])
245
+ q = (
246
+ f"My sleep has been inconsistent because of {cause}. "
247
+ "Give me a realistic 4-step bedtime routine I can follow for 7 days, plus one rule to make it easier to stick to."
248
+ )
249
+ routines = [
250
+ [
251
+ "Set a wind-down alarm 45 minutes before bed.",
252
+ "Dim lights; prep tomorrow basics (clothes, bag, water).",
253
+ "Do 5 minutes stretching or light reading.",
254
+ "Lights out at the same time; cool, dark room.",
255
+ ],
256
+ [
257
+ "Finish caffeine/food earlier in the evening.",
258
+ "Take a warm shower or wash up.",
259
+ "Write a 3-line brain-dump (worries + next steps).",
260
+ "Go to bed on schedule; no snooze scrolling.",
261
+ ],
262
+ ]
263
+ rule_by_cause = {
264
+ "scrolling my phone": "Rule: keep the phone outside the bedroom.",
265
+ "late caffeine": "Rule: no caffeine after 2 PM.",
266
+ "worrying about work": "Rule: write tomorrow's top 3 tasks before bed.",
267
+ "irregular bedtimes": "Rule: keep bedtime within a 30-minute window.",
268
+ }
269
+ chosen = pick(rng, routines)
270
+ a = (
271
+ f"Bedtime routine (for '{cause}'): "
272
+ + fmt_list(chosen)
273
+ + " " + rule_by_cause.get(cause, "Rule: no screens for the last 30 minutes.")
274
+ )
275
+ return QA(q, a, "wellbeing")
276
+
277
+
278
+ def qa_food_recipe(rng: random.Random) -> QA:
279
+ pantry = pick(rng, [
280
+ ("chicken", "rice", "frozen vegetables"),
281
+ ("pasta", "canned tomatoes", "garlic"),
282
+ ("eggs", "spinach", "toast"),
283
+ ("beans", "rice", "salsa"),
284
+ ("oats", "milk", "berries"),
285
+ ])
286
+ constraint = pick(rng, ["under 25 minutes", "one pan", "low-sugar", "high-protein", "budget-friendly"])
287
+ q = (
288
+ f"I have {pantry[0]}, {pantry[1]}, and {pantry[2]}. "
289
+ f"Give me a simple dinner plan that is {constraint}. "
290
+ "Include 5 steps and one flavor tip (salt/acid/spice/herbs)."
291
+ )
292
+ tip = pick(rng, [
293
+ "Finish with lemon or vinegar for brightness.",
294
+ "Add garlic/chili at the end for aroma.",
295
+ "Use a pinch of sugar to balance acidity (tiny amount).",
296
+ "Toast spices briefly in oil to deepen flavor.",
297
+ ])
298
+ a = (
299
+ f"Dinner using {pantry[0]}, {pantry[1]}, {pantry[2]} ({constraint}): "
300
+ + fmt_list([
301
+ "Prep: rinse/cut ingredients; preheat pan.",
302
+ f"Cook {pantry[1]} (or warm it if already cooked).",
303
+ f"Cook {pantry[0]} first, then add {pantry[2]}.",
304
+ "Season with salt and pepper; add sauce if you have one.",
305
+ "Combine, heat through, and taste-adjust.",
306
+ ])
307
+ + f"\nFlavor tip: {tip}"
308
+ )
309
+ return QA(q, a, "food")
310
+
311
+
312
+ def qa_budget(rng: random.Random) -> QA:
313
+ income = pick(rng, [1900, 2600, 3200, 4500])
314
+ rent = pick(rng, [700, 900, 1200, 1600])
315
+ goal = pick(rng, ["build an emergency fund", "pay down debt", "save $200", "stop overspending on food"])
316
+ q = (
317
+ f"Help me create a simple monthly budget. Take-home pay: ${income}. Rent: ${rent}. Goal: {goal}. "
318
+ "Give 6 categories with suggested amounts and one rule for tracking that won't take more than 2 minutes per day."
319
+ )
320
+ # Rough categories (not financial advice; just practical buckets)
321
+ remaining = max(income - rent, 0)
322
+ food = int(remaining * 0.22)
323
+ transport = int(remaining * 0.12)
324
+ utilities = int(remaining * 0.10)
325
+ savings = int(remaining * 0.18)
326
+ personal = int(remaining * 0.18)
327
+ buffer = max(remaining - (food + transport + utilities + savings + personal), 0)
328
+ a = (
329
+ "Example categories: "
330
+ + fmt_list([
331
+ f"Rent: ${rent}",
332
+ f"Food: ${food}",
333
+ f"Transport: ${transport}",
334
+ f"Utilities/internet: ${utilities}",
335
+ f"Savings/goal: ${savings}",
336
+ f"Personal/misc + buffer: ${personal + buffer}",
337
+ ])
338
+ + " Tracking rule: once per day, log only today's total spending into 3 buckets (food / transport / misc)."
339
+ )
340
+ return QA(q, a, "finance")
341
+
342
+
343
+ def qa_device_security(rng: random.Random) -> QA:
344
+ device = pick(rng, ["phone", "laptop"])
345
+ risk = pick(rng, ["phishing emails", "stolen device", "weak passwords", "untrusted downloads"])
346
+ q = (
347
+ f"Give me a simple security checklist for my {device}. My biggest concern is {risk}. "
348
+ "List 7 actions in priority order, and keep each action under 10 words."
349
+ )
350
+ base = [
351
+ "Update the operating system.",
352
+ "Enable a strong screen lock.",
353
+ "Turn on two-factor authentication.",
354
+ "Use a password manager.",
355
+ "Remove unused apps/extensions.",
356
+ "Review app permissions.",
357
+ "Back up important data automatically.",
358
+ ]
359
+ risk_specific = {
360
+ "phishing emails": "Verify senders; never type passwords from links.",
361
+ "stolen device": "Enable Find My Device and remote wipe.",
362
+ "weak passwords": "Replace reused passwords with long unique ones.",
363
+ "untrusted downloads": "Install only from trusted stores; scan files.",
364
+ }
365
+ # Make the list slightly different each time but still sensible.
366
+ rng.shuffle(base)
367
+ base[0] = "Update the operating system." # always first
368
+ base.insert(2, risk_specific.get(risk, "Be cautious with links and attachments."))
369
+ # Cap to 7 items after insertion
370
+ a = fmt_list(base[:7])
371
+ return QA(q, a, "tech")
372
+
373
+
374
+ def qa_relationship(rng: random.Random) -> QA:
375
+ situation = pick(rng, [
376
+ "we argue about chores",
377
+ "we misunderstand tone over text",
378
+ "we don't schedule time together",
379
+ "I feel unheard in conversations",
380
+ ])
381
+ q = (
382
+ f"In my relationship, {situation}. I want to communicate better without blaming. "
383
+ "Give me 3 exact phrases I can say, and one weekly habit we can try for 15 minutes."
384
+ )
385
+ a = (
386
+ "Phrases you can use: "
387
+ + fmt_list([
388
+ "'When X happens, I feel Y, and I need Z.'",
389
+ "'Can we solve this as a team—what's your view?'",
390
+ "'What would make this easier for you this week?'",
391
+ ])
392
+ + " Weekly habit: a 15-minute check-in (no phones) to plan chores, one fun thing, and one stress point."
393
+ )
394
+ return QA(q, a, "relationships")
395
+
396
+
397
+ def qa_interview(rng: random.Random) -> QA:
398
+ role = pick(rng, ["data analyst", "software engineer", "project manager", "teacher", "designer"])
399
+ q = (
400
+ f"I'm interviewing for a {role} role. I want answers that sound human, not scripted. "
401
+ "Give me 5 common questions and a short example answer for each (2–3 sentences)."
402
+ )
403
+ a = (
404
+ "Common questions + example answers:\n"
405
+ "1. Tell me about yourself — I'm a " + role + " who enjoys solving practical problems and working with others; recently I focused on improving a process and measuring results.\n"
406
+ "2. Why this role — I like the mix of ownership and collaboration, and I want to do work that has clear impact.\n"
407
+ "3. Strength — I'm good at breaking messy problems into steps and communicating progress.\n"
408
+ "4. Challenge — I used to take on too much; now I clarify priorities early and set realistic deadlines.\n"
409
+ "5. Conflict — I ask for specifics, align on the goal, and propose a small next step everyone can agree on."
410
+ )
411
+ return QA(q, a, "work")
412
+
413
+
414
+ def qa_customer_support(rng: random.Random) -> QA:
415
+ issue = pick(rng, [
416
+ "a package is late",
417
+ "I was charged twice",
418
+ "an item arrived damaged",
419
+ "I need to cancel a subscription",
420
+ ])
421
+ tone = pick(rng, ["polite", "firm but calm", "short and direct"])
422
+ q = (
423
+ f"Write a {tone} message to customer support: {issue}. "
424
+ "Include order/account placeholders, what I want, and a reasonable deadline for response."
425
+ )
426
+ a = (
427
+ "Hello Support Team. I'm contacting you about [Order #] / [Account Email]. "
428
+ f"Issue: {issue}. Please [refund/replace/cancel] and confirm the resolution. "
429
+ "If possible, I'd appreciate an update within 2 business days. Thank you, [Name]."
430
+ )
431
+ return QA(q, a, "admin")
432
+
433
+
434
+ def qa_exercise_plan(rng: random.Random) -> QA:
435
+ goal = pick(rng, ["build stamina", "reduce back stiffness", "lose a little fat", "get stronger"])
436
+ constraint = pick(rng, ["no equipment", "a small apartment", "10 minutes per day", "3 days per week"])
437
+ level = pick(rng, ["beginner", "returning after a break", "not very fit"])
438
+ q = (
439
+ f"I want to {goal} and I'm {level}. Constraint: {constraint}. "
440
+ "Give me a simple plan for the next 7 days with exact exercises, reps/time, and one recovery tip."
441
+ )
442
+ plan_a = [
443
+ "Day 1: 3 rounds — 8 squats, 6 push-ups (knees ok), 20s plank.",
444
+ "Day 2: 20–30 min walk + 5 min stretching.",
445
+ "Day 3: 3 rounds — 10 lunges each side, 8 towel rows, 20s side plank.",
446
+ "Day 4: Easy walk + gentle mobility (hips/shoulders).",
447
+ "Day 5: Repeat Day 1; add 1–2 reps if easy.",
448
+ "Day 6: Intervals — 1 min brisk / 1 min easy for 12 minutes.",
449
+ "Day 7: Rest + 10 minutes stretching.",
450
+ ]
451
+ plan_b = [
452
+ "Day 1: 10 minutes — brisk walk + 2 minutes stretching.",
453
+ "Day 2: 3 rounds — 8 glute bridges, 8 bird-dogs each side, 20s plank.",
454
+ "Day 3: Easy walk + shoulder/hip mobility.",
455
+ "Day 4: 3 rounds — 10 squats, 8 incline push-ups, 20s side plank.",
456
+ "Day 5: Easy walk + 5 minutes stretching.",
457
+ "Day 6: Repeat the strength day you liked most.",
458
+ "Day 7: Rest day + short gentle walk.",
459
+ ]
460
+ chosen = pick(rng, [plan_a, plan_b])
461
+ recovery = pick(rng, [
462
+ "Stop 1–2 reps before failure and keep form clean.",
463
+ "If pain (sharp) shows up, stop and scale down.",
464
+ "Sleep and hydration matter more than perfect workouts.",
465
+ ])
466
+ a = (
467
+ f"7-day plan for '{goal}' ({level}, constraint: {constraint}): "
468
+ + fmt_list(chosen)
469
+ + f" Recovery tip: {recovery}"
470
+ )
471
+ return QA(q, a, "fitness")
472
+
473
+
474
+ def qa_study_plan(rng: random.Random) -> QA:
475
+ subject = pick(rng, ["English vocabulary", "math basics", "a certification exam", "typing", "public speaking"])
476
+ time = pick(rng, ["45 minutes", "30 minutes", "60 minutes", "25 minutes"])
477
+ horizon = pick(rng, ["2 weeks", "10 days", "one month"])
478
+ weakness = pick(rng, ["I forget quickly", "I get distracted", "I don't know what to practice", "I cram at the end"])
479
+ q = (
480
+ f"Help me study {subject} for {horizon}. I can do {time} per day. Weakness: {weakness}. "
481
+ "Give a daily structure, a review method, and one way to measure progress weekly."
482
+ )
483
+ structure_variants = [
484
+ [
485
+ "5 min setup: pick one topic and open materials.",
486
+ f"{time} practice: do problems/drills first, then check.",
487
+ "10 min review: write mistakes + fixes in a list.",
488
+ ],
489
+ [
490
+ f"10 min review: spaced repetition or yesterday's errors.",
491
+ f"{time} focus: 2 x 15 minutes practice with a 2-minute break.",
492
+ "5 min summary: write 3 bullet takeaways.",
493
+ ],
494
+ ]
495
+ chosen = pick(rng, structure_variants)
496
+ metric = pick(rng, [
497
+ "a weekly 10-minute timed quiz",
498
+ "a list of your top 10 recurring mistakes",
499
+ "words/minute or accuracy (for typing)",
500
+ "one recorded 2-minute practice (for speaking)",
501
+ ])
502
+ a = (
503
+ f"Daily structure for {subject} ({horizon}): "
504
+ + fmt_list(chosen)
505
+ + " Review method: revisit the same items after 1 day, 3 days, and 7 days."
506
+ + f" Weekly measure: track {metric} and note what improved."
507
+ )
508
+ return QA(q, a, "study")
509
+
510
+
511
+ def qa_travel_packing(rng: random.Random) -> QA:
512
+ trip = pick(rng, ["3-day trip", "5-day trip", "weekend trip"])
513
+ weather = pick(rng, ["cold and rainy", "hot and humid", "mild with wind", "snowy"])
514
+ style = pick(rng, ["carry-on only", "one backpack", "minimal packing"])
515
+ q = (
516
+ f"I'm going on a {trip} and the weather will be {weather}. I want {style}. "
517
+ "Give me a packing checklist grouped by clothes, toiletries, tech, and documents, plus 2 'don't forget' items."
518
+ )
519
+ extra = {
520
+ "cold and rainy": "Don't forget: a compact umbrella and warm socks.",
521
+ "hot and humid": "Don't forget: sunscreen and a light hat.",
522
+ "mild with wind": "Don't forget: a windbreaker and lip balm.",
523
+ "snowy": "Don't forget: gloves and a warm beanie.",
524
+ }
525
+ a = (
526
+ f"Packing checklist ({trip}, {weather}, {style}): "
527
+ "Clothes: 2–3 tops, 1–2 bottoms, underwear/socks, outer layer, comfortable shoes. "
528
+ "Toiletries: toothbrush, deodorant, small liquids, meds, comb. "
529
+ "Tech: charger, power bank, headphones, adapter (if needed). "
530
+ "Documents: ID/passport, cards, reservations, emergency contact. "
531
+ + extra.get(weather, "Don't forget: water bottle and a zip bag for liquids.")
532
+ )
533
+ return QA(q, a, "travel")
534
+
535
+
536
+ GENERATORS: List[Callable[[random.Random], QA]] = [
537
+ qa_check_in,
538
+ qa_decline_invite,
539
+ qa_apology,
540
+ qa_late_reply,
541
+ qa_day_plan,
542
+ qa_focus_setup,
543
+ qa_anxiety,
544
+ qa_sleep,
545
+ qa_food_recipe,
546
+ qa_budget,
547
+ qa_device_security,
548
+ qa_relationship,
549
+ qa_interview,
550
+ qa_customer_support,
551
+ qa_exercise_plan,
552
+ qa_study_plan,
553
+ qa_travel_packing,
554
+ ]
555
+
556
+
557
+ def generate_dataset(count: int, seed: int) -> List[QA]:
558
+ rng = random.Random(seed)
559
+
560
+ qas: List[QA] = []
561
+ seen_questions = set()
562
+ answer_counts: Counter[str] = Counter()
563
+
564
+ # Heuristics: keep diversity high
565
+ max_same_answer = 10
566
+ min_question_words = 16
567
+ min_info_tokens = 9
568
+
569
+ # Try multiple attempts to fill
570
+ attempts = 0
571
+ while len(qas) < count:
572
+ attempts += 1
573
+ if attempts > count * 50:
574
+ raise RuntimeError("Unable to generate enough diverse QAs; relax constraints.")
575
+
576
+ # Randomize generator choice so we don't get stuck on one template.
577
+ gen = pick(rng, GENERATORS)
578
+ qa = gen(rng)
579
+
580
+ q = re.sub(r"\s+", " ", qa.question).strip()
581
+ a = qa.answer.strip()
582
+
583
+ if q in seen_questions:
584
+ continue
585
+
586
+ if len(word_tokens(q)) < min_question_words:
587
+ continue
588
+
589
+ if len(info_tokens(q)) < min_info_tokens:
590
+ continue
591
+
592
+ if answer_counts[a] >= max_same_answer:
593
+ continue
594
+
595
+ seen_questions.add(q)
596
+ answer_counts[a] += 1
597
+ qas.append(QA(q, a, qa.category))
598
+
599
+ return qas
600
+
601
+
602
+ def write_csv(qas: List[QA], question_path: Path, answer_path: Path) -> None:
603
+ question_path.parent.mkdir(parents=True, exist_ok=True)
604
+
605
+ with question_path.open("w", encoding="utf-8", newline="") as f:
606
+ w = csv.writer(f, quoting=csv.QUOTE_ALL)
607
+ w.writerow(["question"])
608
+ for qa in qas:
609
+ w.writerow([sanitize_cell(qa.question)])
610
+
611
+ with answer_path.open("w", encoding="utf-8", newline="") as f:
612
+ w = csv.writer(f, quoting=csv.QUOTE_ALL)
613
+ w.writerow(["answer"])
614
+ for qa in qas:
615
+ w.writerow([sanitize_cell(qa.answer)])
616
+
617
+
618
+ def print_stats(qas: List[QA]) -> None:
619
+ q_lens = [len(info_tokens(qa.question)) for qa in qas]
620
+ a_counts = Counter([qa.answer for qa in qas])
621
+ top_a = a_counts.most_common(5)
622
+ cat_counts = Counter([qa.category for qa in qas])
623
+
624
+ print("QAs:", len(qas))
625
+ print("Categories:", dict(cat_counts))
626
+ print("InfoTokens: min/avg/max =",
627
+ min(q_lens),
628
+ round(sum(q_lens) / len(q_lens), 2),
629
+ max(q_lens))
630
+ print("Unique answers:", len(a_counts))
631
+ print("Most repeated answers:")
632
+ for ans, n in top_a:
633
+ print(" ", n, "x |", ans.splitlines()[0][:90])
634
+
635
+
636
+ def main() -> None:
637
+ qas = generate_dataset(count=400, seed=79079)
638
+ write_csv(qas, QUESTION_CSV, ANSWER_CSV)
639
+ print_stats(qas)
640
+ print("Wrote:", QUESTION_CSV)
641
+ print("Wrote:", ANSWER_CSV)
642
+
643
+
644
+ if __name__ == "__main__":
645
+ main()