@tidyjs/tidy 2.5.2 → 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/dist/es/addRows.js.map +1 -1
  2. package/dist/es/arrange.js +9 -5
  3. package/dist/es/arrange.js.map +1 -1
  4. package/dist/es/complete.js.map +1 -1
  5. package/dist/es/count.js +6 -2
  6. package/dist/es/count.js.map +1 -1
  7. package/dist/es/debug.js +4 -2
  8. package/dist/es/debug.js.map +1 -1
  9. package/dist/es/distinct.js +3 -3
  10. package/dist/es/distinct.js.map +1 -1
  11. package/dist/es/expand.js +18 -13
  12. package/dist/es/expand.js.map +1 -1
  13. package/dist/es/fill.js +10 -3
  14. package/dist/es/fill.js.map +1 -1
  15. package/dist/es/filter.js.map +1 -1
  16. package/dist/es/fullJoin.js +23 -20
  17. package/dist/es/fullJoin.js.map +1 -1
  18. package/dist/es/groupBy.js +56 -28
  19. package/dist/es/groupBy.js.map +1 -1
  20. package/dist/es/helpers/assignGroupKeys.js +4 -4
  21. package/dist/es/helpers/assignGroupKeys.js.map +1 -1
  22. package/dist/es/helpers/groupMap.js +2 -2
  23. package/dist/es/helpers/groupMap.js.map +1 -1
  24. package/dist/es/helpers/groupTraversal.js +5 -4
  25. package/dist/es/helpers/groupTraversal.js.map +1 -1
  26. package/dist/es/helpers/identity.js.map +1 -1
  27. package/dist/es/helpers/isObject.js.map +1 -1
  28. package/dist/es/helpers/keysFromItems.js +1 -2
  29. package/dist/es/helpers/keysFromItems.js.map +1 -1
  30. package/dist/es/helpers/singleOrArray.js.map +1 -1
  31. package/dist/es/helpers/summation.js +11 -3
  32. package/dist/es/helpers/summation.js.map +1 -1
  33. package/dist/es/innerJoin.js +37 -12
  34. package/dist/es/innerJoin.js.map +1 -1
  35. package/dist/es/item/rate.js +2 -3
  36. package/dist/es/item/rate.js.map +1 -1
  37. package/dist/es/leftJoin.js +13 -8
  38. package/dist/es/leftJoin.js.map +1 -1
  39. package/dist/es/map.js.map +1 -1
  40. package/dist/es/math/math.js.map +1 -1
  41. package/dist/es/mutate.js +1 -1
  42. package/dist/es/mutate.js.map +1 -1
  43. package/dist/es/mutateWithSummary.js +1 -1
  44. package/dist/es/mutateWithSummary.js.map +1 -1
  45. package/dist/es/pivotLonger.js +31 -7
  46. package/dist/es/pivotLonger.js.map +1 -1
  47. package/dist/es/pivotWider.js +24 -19
  48. package/dist/es/pivotWider.js.map +1 -1
  49. package/dist/es/rename.js.map +1 -1
  50. package/dist/es/replaceNully.js +1 -1
  51. package/dist/es/replaceNully.js.map +1 -1
  52. package/dist/es/select.js +3 -3
  53. package/dist/es/select.js.map +1 -1
  54. package/dist/es/selectors/contains.js.map +1 -1
  55. package/dist/es/selectors/endsWith.js.map +1 -1
  56. package/dist/es/selectors/everything.js.map +1 -1
  57. package/dist/es/selectors/matches.js.map +1 -1
  58. package/dist/es/selectors/negate.js +2 -2
  59. package/dist/es/selectors/negate.js.map +1 -1
  60. package/dist/es/selectors/numRange.js.map +1 -1
  61. package/dist/es/selectors/startsWith.js.map +1 -1
  62. package/dist/es/sequences/fullSeq.js +5 -1
  63. package/dist/es/sequences/fullSeq.js.map +1 -1
  64. package/dist/es/slice.js +2 -3
  65. package/dist/es/slice.js.map +1 -1
  66. package/dist/es/summarize.js +6 -4
  67. package/dist/es/summarize.js.map +1 -1
  68. package/dist/es/summary/deviation.js.map +1 -1
  69. package/dist/es/summary/first.js.map +1 -1
  70. package/dist/es/summary/last.js.map +1 -1
  71. package/dist/es/summary/max.js.map +1 -1
  72. package/dist/es/summary/mean.js.map +1 -1
  73. package/dist/es/summary/meanRate.js.map +1 -1
  74. package/dist/es/summary/median.js.map +1 -1
  75. package/dist/es/summary/min.js.map +1 -1
  76. package/dist/es/summary/n.js.map +1 -1
  77. package/dist/es/summary/nDistinct.js +2 -2
  78. package/dist/es/summary/nDistinct.js.map +1 -1
  79. package/dist/es/summary/sum.js.map +1 -1
  80. package/dist/es/summary/variance.js.map +1 -1
  81. package/dist/es/tally.js +4 -2
  82. package/dist/es/tally.js.map +1 -1
  83. package/dist/es/tidy.js.map +1 -1
  84. package/dist/es/total.js.map +1 -1
  85. package/dist/es/transmute.js.map +1 -1
  86. package/dist/es/vector/cumsum.js.map +1 -1
  87. package/dist/es/vector/lag.js +1 -1
  88. package/dist/es/vector/lag.js.map +1 -1
  89. package/dist/es/vector/lead.js +1 -1
  90. package/dist/es/vector/lead.js.map +1 -1
  91. package/dist/es/vector/roll.js +1 -1
  92. package/dist/es/vector/roll.js.map +1 -1
  93. package/dist/es/vector/rowNumber.js.map +1 -1
  94. package/dist/es/when.js +1 -2
  95. package/dist/es/when.js.map +1 -1
  96. package/dist/lib/addRows.js +0 -2
  97. package/dist/lib/addRows.js.map +1 -1
  98. package/dist/lib/arrange.js +9 -7
  99. package/dist/lib/arrange.js.map +1 -1
  100. package/dist/lib/complete.js +0 -2
  101. package/dist/lib/complete.js.map +1 -1
  102. package/dist/lib/count.js +6 -4
  103. package/dist/lib/count.js.map +1 -1
  104. package/dist/lib/debug.js +4 -4
  105. package/dist/lib/debug.js.map +1 -1
  106. package/dist/lib/distinct.js +3 -5
  107. package/dist/lib/distinct.js.map +1 -1
  108. package/dist/lib/expand.js +18 -15
  109. package/dist/lib/expand.js.map +1 -1
  110. package/dist/lib/fill.js +10 -5
  111. package/dist/lib/fill.js.map +1 -1
  112. package/dist/lib/filter.js +0 -2
  113. package/dist/lib/filter.js.map +1 -1
  114. package/dist/lib/fullJoin.js +22 -21
  115. package/dist/lib/fullJoin.js.map +1 -1
  116. package/dist/lib/groupBy.js +56 -30
  117. package/dist/lib/groupBy.js.map +1 -1
  118. package/dist/lib/helpers/assignGroupKeys.js +4 -6
  119. package/dist/lib/helpers/assignGroupKeys.js.map +1 -1
  120. package/dist/lib/helpers/groupMap.js +2 -4
  121. package/dist/lib/helpers/groupMap.js.map +1 -1
  122. package/dist/lib/helpers/groupTraversal.js +5 -6
  123. package/dist/lib/helpers/groupTraversal.js.map +1 -1
  124. package/dist/lib/helpers/identity.js +0 -2
  125. package/dist/lib/helpers/identity.js.map +1 -1
  126. package/dist/lib/helpers/isObject.js +0 -2
  127. package/dist/lib/helpers/isObject.js.map +1 -1
  128. package/dist/lib/helpers/keysFromItems.js +1 -4
  129. package/dist/lib/helpers/keysFromItems.js.map +1 -1
  130. package/dist/lib/helpers/singleOrArray.js +0 -2
  131. package/dist/lib/helpers/singleOrArray.js.map +1 -1
  132. package/dist/lib/helpers/summation.js +10 -4
  133. package/dist/lib/helpers/summation.js.map +1 -1
  134. package/dist/lib/index.js +0 -2
  135. package/dist/lib/index.js.map +1 -1
  136. package/dist/lib/innerJoin.js +38 -14
  137. package/dist/lib/innerJoin.js.map +1 -1
  138. package/dist/lib/item/rate.js +2 -5
  139. package/dist/lib/item/rate.js.map +1 -1
  140. package/dist/lib/leftJoin.js +12 -9
  141. package/dist/lib/leftJoin.js.map +1 -1
  142. package/dist/lib/map.js +0 -2
  143. package/dist/lib/map.js.map +1 -1
  144. package/dist/lib/math/math.js +0 -2
  145. package/dist/lib/math/math.js.map +1 -1
  146. package/dist/lib/mutate.js +1 -3
  147. package/dist/lib/mutate.js.map +1 -1
  148. package/dist/lib/mutateWithSummary.js +1 -3
  149. package/dist/lib/mutateWithSummary.js.map +1 -1
  150. package/dist/lib/pivotLonger.js +31 -9
  151. package/dist/lib/pivotLonger.js.map +1 -1
  152. package/dist/lib/pivotWider.js +24 -21
  153. package/dist/lib/pivotWider.js.map +1 -1
  154. package/dist/lib/rename.js +0 -2
  155. package/dist/lib/rename.js.map +1 -1
  156. package/dist/lib/replaceNully.js +1 -3
  157. package/dist/lib/replaceNully.js.map +1 -1
  158. package/dist/lib/select.js +3 -5
  159. package/dist/lib/select.js.map +1 -1
  160. package/dist/lib/selectors/contains.js +0 -2
  161. package/dist/lib/selectors/contains.js.map +1 -1
  162. package/dist/lib/selectors/endsWith.js +0 -2
  163. package/dist/lib/selectors/endsWith.js.map +1 -1
  164. package/dist/lib/selectors/everything.js +0 -2
  165. package/dist/lib/selectors/everything.js.map +1 -1
  166. package/dist/lib/selectors/matches.js +0 -2
  167. package/dist/lib/selectors/matches.js.map +1 -1
  168. package/dist/lib/selectors/negate.js +2 -4
  169. package/dist/lib/selectors/negate.js.map +1 -1
  170. package/dist/lib/selectors/numRange.js +0 -2
  171. package/dist/lib/selectors/numRange.js.map +1 -1
  172. package/dist/lib/selectors/startsWith.js +0 -2
  173. package/dist/lib/selectors/startsWith.js.map +1 -1
  174. package/dist/lib/sequences/fullSeq.js +5 -3
  175. package/dist/lib/sequences/fullSeq.js.map +1 -1
  176. package/dist/lib/slice.js +2 -5
  177. package/dist/lib/slice.js.map +1 -1
  178. package/dist/lib/summarize.js +6 -6
  179. package/dist/lib/summarize.js.map +1 -1
  180. package/dist/lib/summary/deviation.js +0 -2
  181. package/dist/lib/summary/deviation.js.map +1 -1
  182. package/dist/lib/summary/first.js +0 -2
  183. package/dist/lib/summary/first.js.map +1 -1
  184. package/dist/lib/summary/last.js +0 -2
  185. package/dist/lib/summary/last.js.map +1 -1
  186. package/dist/lib/summary/max.js +0 -2
  187. package/dist/lib/summary/max.js.map +1 -1
  188. package/dist/lib/summary/mean.js +0 -2
  189. package/dist/lib/summary/mean.js.map +1 -1
  190. package/dist/lib/summary/meanRate.js +0 -2
  191. package/dist/lib/summary/meanRate.js.map +1 -1
  192. package/dist/lib/summary/median.js +0 -2
  193. package/dist/lib/summary/median.js.map +1 -1
  194. package/dist/lib/summary/min.js +0 -2
  195. package/dist/lib/summary/min.js.map +1 -1
  196. package/dist/lib/summary/n.js +0 -2
  197. package/dist/lib/summary/n.js.map +1 -1
  198. package/dist/lib/summary/nDistinct.js +2 -4
  199. package/dist/lib/summary/nDistinct.js.map +1 -1
  200. package/dist/lib/summary/sum.js +0 -2
  201. package/dist/lib/summary/sum.js.map +1 -1
  202. package/dist/lib/summary/variance.js +0 -2
  203. package/dist/lib/summary/variance.js.map +1 -1
  204. package/dist/lib/tally.js +4 -4
  205. package/dist/lib/tally.js.map +1 -1
  206. package/dist/lib/tidy.js +0 -2
  207. package/dist/lib/tidy.js.map +1 -1
  208. package/dist/lib/total.js +0 -2
  209. package/dist/lib/total.js.map +1 -1
  210. package/dist/lib/transmute.js +0 -2
  211. package/dist/lib/transmute.js.map +1 -1
  212. package/dist/lib/vector/cumsum.js +0 -2
  213. package/dist/lib/vector/cumsum.js.map +1 -1
  214. package/dist/lib/vector/lag.js +1 -3
  215. package/dist/lib/vector/lag.js.map +1 -1
  216. package/dist/lib/vector/lead.js +1 -3
  217. package/dist/lib/vector/lead.js.map +1 -1
  218. package/dist/lib/vector/roll.js +1 -3
  219. package/dist/lib/vector/roll.js.map +1 -1
  220. package/dist/lib/vector/rowNumber.js +0 -2
  221. package/dist/lib/vector/rowNumber.js.map +1 -1
  222. package/dist/lib/when.js +1 -4
  223. package/dist/lib/when.js.map +1 -1
  224. package/dist/tidy.d.ts +217 -1775
  225. package/dist/umd/tidy.js +307 -184
  226. package/dist/umd/tidy.js.map +1 -1
  227. package/dist/umd/tidy.min.js +1 -1
  228. package/dist/umd/tidy.min.js.map +1 -1
  229. package/genai-docs/api-core.md +357 -0
  230. package/genai-docs/api-grouping.md +400 -0
  231. package/genai-docs/api-joins.md +118 -0
  232. package/genai-docs/api-other.md +238 -0
  233. package/genai-docs/api-pivot.md +112 -0
  234. package/genai-docs/api-selectors.md +159 -0
  235. package/genai-docs/api-sequences.md +127 -0
  236. package/genai-docs/api-slice.md +137 -0
  237. package/genai-docs/api-summarize.md +528 -0
  238. package/genai-docs/api-vector.md +239 -0
  239. package/genai-docs/gotchas.md +193 -0
  240. package/genai-docs/index.md +44 -0
  241. package/genai-docs/mental-model.md +270 -0
  242. package/genai-docs/patterns.md +384 -0
  243. package/genai-docs/quick-reference.md +125 -0
  244. package/package.json +16 -10
  245. package/LICENSE +0 -21
@@ -0,0 +1,270 @@
1
+ # Mental Model for tidyjs
2
+
3
+ ## What is tidyjs?
4
+
5
+ tidyjs is a JavaScript/TypeScript library for data wrangling that works with **plain arrays of objects** — no special DataFrame wrapper. It is inspired by R's dplyr and the tidyverse. Think of it as a functional pipeline for transforming `{key: value}[]` data, similar to how you might chain SQL operations or pandas methods, but using composable JavaScript functions.
6
+
7
+ ## The Pipeline Pattern
8
+
9
+ Everything in tidyjs flows through `tidy()`:
10
+
11
+ ```js
12
+ import { tidy, filter, mutate, arrange, desc } from '@tidyjs/tidy';
13
+
14
+ const result = tidy(
15
+ data, // 1st arg: array of objects
16
+ filter(...), // 2nd+ args: transformation functions (verbs)
17
+ mutate(...),
18
+ arrange(desc('value'))
19
+ );
20
+ // result is a new array of objects
21
+ ```
22
+
23
+ **Key rules:**
24
+ - First argument is always the data array — `tidy(data, ...fns)`
25
+ - Each subsequent argument is a **verb** — a function that returns a `TidyFn`
26
+ - Verbs are **curried**: `filter(predicate)` returns a function `(items[]) => items[]`
27
+ - The output of each verb feeds into the next
28
+ - `tidy()` returns a new array (never mutates the input)
29
+ - You can pass up to **10 pipeline steps** with full TypeScript type inference
30
+
31
+ **Common mistake — don't call verbs directly:**
32
+
33
+ ```js
34
+ // WRONG: calling filter directly without tidy()
35
+ const result = filter((d) => d.value > 10)(data);
36
+
37
+ // CORRECT: use tidy() as the pipeline
38
+ const result = tidy(data, filter((d) => d.value > 10));
39
+ ```
40
+
41
+ ## Accessor Functions
42
+
43
+ tidyjs uses **accessor functions** `(d) => d.column` to reference data fields, NOT string column names.
44
+
45
+ ```js
46
+ // CORRECT: accessor function
47
+ tidy(data, filter((d) => d.age > 30))
48
+ tidy(data, mutate({ fullName: (d) => `${d.first} ${d.last}` }))
49
+
50
+ // WRONG: string column names (this is NOT pandas or SQL)
51
+ tidy(data, filter('age > 30')) // won't work
52
+ ```
53
+
54
+ **Exception:** Some summary functions accept either a key string or accessor for convenience:
55
+ ```js
56
+ sum('value') // shorthand — string key
57
+ sum((d) => d.value) // equivalent — accessor function
58
+ mean('score') // string key shorthand
59
+ ```
60
+
61
+ These are the **only** places strings work as field references: inside summary functions like `sum`, `mean`, `min`, `max`, `median`, `first`, `last`, `n`, `nDistinct`, `deviation`, `variance`, and sort helpers like `asc('key')`, `desc('key')`.
62
+
63
+ ## The Function Taxonomy
64
+
65
+ This is **critical** — each function type belongs in a specific context:
66
+
67
+ ### Tidy Verbs → go directly inside `tidy()`
68
+
69
+ These are pipeline steps that transform the array:
70
+
71
+ ```js
72
+ tidy(data,
73
+ filter((d) => d.active), // filter rows
74
+ mutate({ tax: (d) => d.price * 0.1 }), // add/modify columns per item
75
+ arrange(desc('price')), // sort rows
76
+ select(['name', 'price', 'tax']), // pick columns
77
+ distinct(['category']), // deduplicate
78
+ rename({ old_name: 'new_name' }) // rename columns
79
+ )
80
+ ```
81
+
82
+ Full list: `filter`, `mutate`, `transmute`, `mutateWithSummary`, `arrange` (alias: `sort`), `select` (alias: `pick`), `distinct`, `rename`, `slice`, `sliceHead`, `sliceTail`, `sliceMin`, `sliceMax`, `sliceSample`, `groupBy`, `summarize`, `summarizeAll`, `summarizeAt`, `summarizeIf`, `total`, `totalAll`, `totalAt`, `totalIf`, `count`, `tally`, `innerJoin`, `leftJoin`, `fullJoin`, `pivotWider`, `pivotLonger`, `complete`, `expand`, `fill`, `replaceNully`, `addRows` (alias: `addItems`), `when`, `map`, `debug`
83
+
84
+ ### Summary Functions → go inside `summarize()` or `total()`
85
+
86
+ These **reduce** an array of items to a single value:
87
+
88
+ ```js
89
+ tidy(data,
90
+ summarize({
91
+ totalRevenue: sum('revenue'),
92
+ avgScore: mean('score'),
93
+ count: n(),
94
+ })
95
+ )
96
+ // => [{ totalRevenue: 1500, avgScore: 85, count: 10 }]
97
+ ```
98
+
99
+ Summary functions: `sum`, `mean`, `median`, `min`, `max`, `n`, `nDistinct`, `first`, `last`, `deviation`, `variance`, `meanRate`
100
+
101
+ **They also work inside `mutateWithSummary()`** to add summary-derived columns back to every row.
102
+
103
+ ### Vector Functions → go inside `mutateWithSummary()`
104
+
105
+ These operate on the **full array** and return a new array of the same length:
106
+
107
+ ```js
108
+ tidy(data,
109
+ mutateWithSummary({
110
+ runningTotal: cumsum('value'),
111
+ prevValue: lag('value'),
112
+ nextValue: lead('value'),
113
+ rank: rowNumber(),
114
+ })
115
+ )
116
+ ```
117
+
118
+ Vector functions: `cumsum`, `lag`, `lead`, `roll`, `rowNumber`
119
+
120
+ ### Item Functions → go inside `mutate()`
121
+
122
+ These transform **one item at a time**:
123
+
124
+ ```js
125
+ tidy(data,
126
+ mutate({
127
+ conversionRate: rate('conversions', 'impressions'),
128
+ })
129
+ )
130
+ ```
131
+
132
+ Item functions: `rate`
133
+
134
+ ### Selectors → go inside `select()`, `summarizeAt()`, `pivotLonger(cols:)`
135
+
136
+ These dynamically select columns by pattern:
137
+
138
+ ```js
139
+ tidy(data,
140
+ select([startsWith('revenue_'), 'name'])
141
+ )
142
+ ```
143
+
144
+ Selectors: `everything`, `startsWith`, `endsWith`, `contains`, `matches`, `numRange`, `negate`
145
+
146
+ ## mutate vs mutateWithSummary
147
+
148
+ This is the **most important distinction** in tidyjs. Getting this wrong produces silent bugs — code that runs but returns incorrect data.
149
+
150
+ ### `mutate` — per-item transformation
151
+
152
+ The function receives `(item, index, array)` for each item individually:
153
+
154
+ ```js
155
+ tidy(data,
156
+ mutate({
157
+ doubled: (d) => d.value * 2,
158
+ label: (d) => `${d.name}: ${d.value}`,
159
+ constant: 42, // non-function values are applied to all items
160
+ })
161
+ )
162
+ ```
163
+
164
+ ### `mutateWithSummary` — cross-item transformation
165
+
166
+ The function receives the **entire array** `(items[])` and must return an array of the same length OR a single value (broadcast to all items):
167
+
168
+ ```js
169
+ tidy(data,
170
+ mutateWithSummary({
171
+ runningTotal: cumsum('value'), // returns array
172
+ pctOfTotal: (items) => // custom: returns array
173
+ items.map(d => d.value / sum('value')(items)),
174
+ totalValue: sum('value'), // returns single value → broadcast
175
+ })
176
+ )
177
+ ```
178
+
179
+ ### When to use which?
180
+
181
+ | Use `mutate` when... | Use `mutateWithSummary` when... |
182
+ |---|---|
183
+ | Each item's new value depends only on that item | New value depends on other items in the array |
184
+ | Simple calculations: `(d) => d.a + d.b` | Cumulative ops: `cumsum`, `lag`, `lead`, `roll` |
185
+ | String formatting: `(d) => d.name.toUpperCase()` | Summary-derived: adding `sum()` or `mean()` as a column |
186
+ | Setting constants: `{ status: 'active' }` | Row numbering: `rowNumber()` |
187
+
188
+ ### The dangerous mistake
189
+
190
+ ```js
191
+ // WRONG — sum() inside mutate() does NOT work correctly
192
+ // sum() expects the full array, but mutate passes one item at a time
193
+ tidy(data, mutate({ total: sum('value') }))
194
+
195
+ // CORRECT — use mutateWithSummary for cross-item operations
196
+ tidy(data, mutateWithSummary({ total: sum('value') }))
197
+ ```
198
+
199
+ ## groupBy Semantics
200
+
201
+ `groupBy` splits data into groups, runs operations per-group, then recombines:
202
+
203
+ ```js
204
+ tidy(data,
205
+ groupBy('category', [
206
+ summarize({ total: sum('value') })
207
+ ])
208
+ )
209
+ // => [{ category: 'A', total: 100 }, { category: 'B', total: 200 }]
210
+ ```
211
+
212
+ **Key behaviors:**
213
+ - Group keys are automatically merged back into results (disable with `addGroupKeys: false`)
214
+ - Operations inside the `fns` array run on each group independently
215
+ - Without an export option, results are flattened back to a single array (ungrouped)
216
+ - Group by multiple keys: `groupBy(['category', 'region'], [...])`
217
+ - Group by computed key: `groupBy((d) => d.date.getFullYear(), [...])`
218
+
219
+ ### Export Modes
220
+
221
+ By default, `groupBy` ungroups the result back into a flat array. Use export mode shortcuts to get different output shapes:
222
+
223
+ ```js
224
+ // Flat array (default — no export option)
225
+ groupBy('key', [summarize(...)])
226
+ // => [{ key: 'a', total: 10 }, { key: 'b', total: 20 }]
227
+
228
+ // Nested entries: [[key, values], ...]
229
+ groupBy('key', [summarize(...)], groupBy.entries())
230
+
231
+ // Entries as objects: [{ key, values }, ...]
232
+ groupBy('key', [summarize(...)], groupBy.entriesObject())
233
+
234
+ // Plain object: { key: values, ... }
235
+ groupBy('key', [summarize(...)], groupBy.object())
236
+
237
+ // ES Map: Map { key => values }
238
+ groupBy('key', [summarize(...)], groupBy.map())
239
+
240
+ // Grouped Map (raw internal structure)
241
+ groupBy('key', [summarize(...)], groupBy.grouped())
242
+
243
+ // Just the keys
244
+ groupBy('key', [summarize(...)], groupBy.keys())
245
+
246
+ // Just the values (arrays)
247
+ groupBy('key', [summarize(...)], groupBy.values())
248
+
249
+ // Per-level control for multi-level grouping
250
+ groupBy(['cat', 'subcat'], [summarize(...)], groupBy.levels({ levels: ['object', 'entries'] }))
251
+ ```
252
+
253
+ Export options also accept: `flat`, `single`, `mapLeaf`, `mapLeaves`, `mapEntry`, `compositeKey`.
254
+
255
+ **Important:** When using an export mode, `groupBy` becomes a `TidyGroupExportFn` — it must be the **last step** in the `tidy()` pipeline (or used inside another `groupBy`).
256
+
257
+ ## TypeScript Tips
258
+
259
+ - **Accessor typing:** `(d: MyType) => d.value` gives full type inference inside `mutate`, `filter`, etc.
260
+ - **Pipeline step limit:** `tidy()` has type overloads for up to 10 steps. For longer pipelines, split into multiple `tidy()` calls or use `as` assertions.
261
+ - **groupBy return types:** The return type changes based on the export option. `groupBy.object()` returns `ObjectOutput`, `groupBy.entries()` returns `EntriesOutput`, etc. Without an export option, it returns the flat array type.
262
+ - **Summary function keys:** `sum('value')` infers the key must exist on the input type. Use accessor functions `sum((d) => d.value)` for computed values.
263
+
264
+ ## What tidyjs is NOT
265
+
266
+ - **Not a DataFrame wrapper** — works directly with `{key: value}[]` arrays, no special data structure
267
+ - **Not lazy-evaluated** — each verb executes immediately in the pipeline
268
+ - **Not a database query builder** — all data is in memory
269
+ - **Not a charting library** — it transforms data; use a separate library to visualize
270
+ - **Not a replacement for lodash/Array methods** — use it when you need multi-step data wrangling pipelines; for simple `.filter()` or `.map()`, plain JS is fine
@@ -0,0 +1,384 @@
1
+ # Patterns and Recipes
2
+
3
+ Multi-verb recipes for common data transformation tasks.
4
+
5
+ ---
6
+
7
+ ## 1. Group and Summarize
8
+
9
+ The most common tidyjs pattern — split data into groups, then aggregate each group.
10
+
11
+ ```js
12
+ const data = [
13
+ { category: 'A', region: 'east', value: 10 },
14
+ { category: 'A', region: 'west', value: 20 },
15
+ { category: 'B', region: 'east', value: 30 },
16
+ { category: 'B', region: 'west', value: 40 },
17
+ ];
18
+
19
+ tidy(data,
20
+ groupBy('category', [
21
+ summarize({
22
+ total: sum('value'),
23
+ avg: mean('value'),
24
+ count: n(),
25
+ })
26
+ ])
27
+ )
28
+ // => [
29
+ // { category: 'A', total: 30, avg: 15, count: 2 },
30
+ // { category: 'B', total: 70, avg: 35, count: 2 },
31
+ // ]
32
+ ```
33
+
34
+ **With multiple group keys:**
35
+
36
+ ```js
37
+ tidy(data,
38
+ groupBy(['category', 'region'], [
39
+ summarize({ total: sum('value') })
40
+ ])
41
+ )
42
+ // => [
43
+ // { category: 'A', region: 'east', total: 10 },
44
+ // { category: 'A', region: 'west', total: 20 },
45
+ // { category: 'B', region: 'east', total: 30 },
46
+ // { category: 'B', region: 'west', total: 40 },
47
+ // ]
48
+ ```
49
+
50
+ **Export as a keyed object:**
51
+
52
+ ```js
53
+ tidy(data,
54
+ groupBy('category', [
55
+ summarize({ total: sum('value') })
56
+ ], groupBy.object({ single: true }))
57
+ )
58
+ // => { A: { category: 'A', total: 30 }, B: { category: 'B', total: 70 } }
59
+ ```
60
+
61
+ ---
62
+
63
+ ## 2. Pivot Wider and Longer
64
+
65
+ ### Long to wide
66
+
67
+ ```js
68
+ const data = [
69
+ { name: 'Alice', metric: 'score', value: 90 },
70
+ { name: 'Alice', metric: 'rank', value: 1 },
71
+ { name: 'Bob', metric: 'score', value: 80 },
72
+ { name: 'Bob', metric: 'rank', value: 2 },
73
+ ];
74
+
75
+ tidy(data,
76
+ pivotWider({
77
+ namesFrom: 'metric',
78
+ valuesFrom: 'value',
79
+ })
80
+ )
81
+ // => [
82
+ // { name: 'Alice', score: 90, rank: 1 },
83
+ // { name: 'Bob', score: 80, rank: 2 },
84
+ // ]
85
+ ```
86
+
87
+ ### Wide to long
88
+
89
+ ```js
90
+ const data = [
91
+ { name: 'Alice', score: 90, rank: 1 },
92
+ { name: 'Bob', score: 80, rank: 2 },
93
+ ];
94
+
95
+ tidy(data,
96
+ pivotLonger({
97
+ cols: ['score', 'rank'],
98
+ namesTo: 'metric',
99
+ valuesTo: 'value',
100
+ })
101
+ )
102
+ // => [
103
+ // { name: 'Alice', metric: 'score', value: 90 },
104
+ // { name: 'Alice', metric: 'rank', value: 1 },
105
+ // { name: 'Bob', metric: 'score', value: 80 },
106
+ // { name: 'Bob', metric: 'rank', value: 2 },
107
+ // ]
108
+ ```
109
+
110
+ **Pivot longer with selectors:**
111
+
112
+ ```js
113
+ tidy(data,
114
+ pivotLonger({
115
+ cols: [startsWith('q')], // columns like q1, q2, q3, q4
116
+ namesTo: 'quarter',
117
+ valuesTo: 'revenue',
118
+ })
119
+ )
120
+ ```
121
+
122
+ ---
123
+
124
+ ## 3. Fill Missing Time Series (expand + complete + fill)
125
+
126
+ Generate missing time periods and fill forward.
127
+
128
+ ```js
129
+ const data = [
130
+ { date: '2024-01', category: 'A', value: 10 },
131
+ { date: '2024-03', category: 'A', value: 30 }, // 2024-02 missing
132
+ { date: '2024-01', category: 'B', value: 20 },
133
+ { date: '2024-02', category: 'B', value: 25 },
134
+ ];
135
+
136
+ tidy(data,
137
+ complete({
138
+ date: ['2024-01', '2024-02', '2024-03'],
139
+ category: ['A', 'B'],
140
+ }),
141
+ fill('value')
142
+ )
143
+ // => all date/category combinations exist, nulls filled forward
144
+ ```
145
+
146
+ **With numeric sequences:**
147
+
148
+ ```js
149
+ tidy(data,
150
+ complete({
151
+ year: fullSeq('year', { period: 1 }), // fills gaps in year column
152
+ category: ['A', 'B'],
153
+ }),
154
+ replaceNully({ value: 0 }) // fill missing with 0 instead of forward-fill
155
+ )
156
+ ```
157
+
158
+ ---
159
+
160
+ ## 4. Rolling Aggregation
161
+
162
+ Compute a moving average or other rolling window calculation.
163
+
164
+ ```js
165
+ const data = [
166
+ { date: '2024-01', value: 10 },
167
+ { date: '2024-02', value: 20 },
168
+ { date: '2024-03', value: 15 },
169
+ { date: '2024-04', value: 25 },
170
+ { date: '2024-05', value: 30 },
171
+ ];
172
+
173
+ tidy(data,
174
+ mutateWithSummary({
175
+ movingAvg3: roll(3, mean('value'), { partial: true }),
176
+ })
177
+ )
178
+ // => each row gets a 3-period moving average of 'value'
179
+ // partial: true means first 2 rows use windows smaller than 3
180
+ ```
181
+
182
+ **Rolling sum:**
183
+
184
+ ```js
185
+ tidy(data,
186
+ mutateWithSummary({
187
+ rollingSum: roll(3, sum('value')),
188
+ })
189
+ )
190
+ ```
191
+
192
+ ---
193
+
194
+ ## 5. Cumulative Calculations
195
+
196
+ Add a running total, cumulative count, or percentage of total.
197
+
198
+ ```js
199
+ const data = [
200
+ { month: 'Jan', revenue: 100 },
201
+ { month: 'Feb', revenue: 150 },
202
+ { month: 'Mar', revenue: 200 },
203
+ ];
204
+
205
+ tidy(data,
206
+ mutateWithSummary({
207
+ cumulativeRevenue: cumsum('revenue'),
208
+ rowNum: rowNumber(),
209
+ totalRevenue: sum('revenue'), // broadcast single value to all rows
210
+ }),
211
+ mutate({
212
+ pctOfTotal: (d) => d.revenue / d.totalRevenue,
213
+ })
214
+ )
215
+ // => [
216
+ // { month: 'Jan', revenue: 100, cumulativeRevenue: 100, rowNum: 0, totalRevenue: 450, pctOfTotal: 0.222 },
217
+ // { month: 'Feb', revenue: 150, cumulativeRevenue: 250, rowNum: 1, totalRevenue: 450, pctOfTotal: 0.333 },
218
+ // { month: 'Mar', revenue: 200, cumulativeRevenue: 450, rowNum: 2, totalRevenue: 450, pctOfTotal: 0.444 },
219
+ // ]
220
+ ```
221
+
222
+ ---
223
+
224
+ ## 6. Conditional Pipeline Branching
225
+
226
+ Apply transformations only when a condition is met.
227
+
228
+ ```js
229
+ const includeInactive = false;
230
+
231
+ tidy(data,
232
+ when(includeInactive, []), // no-op when false
233
+ when(!includeInactive, [filter((d) => d.active)]), // filter when true
234
+ arrange(desc('value'))
235
+ )
236
+ ```
237
+
238
+ **With a predicate function:**
239
+
240
+ ```js
241
+ tidy(data,
242
+ when(
243
+ (items) => items.length > 100, // only filter if dataset is large
244
+ [sliceHead(100)]
245
+ ),
246
+ summarize({ avg: mean('score') })
247
+ )
248
+ ```
249
+
250
+ ---
251
+
252
+ ## 7. Multi-Level Grouping with Export
253
+
254
+ Nested grouping with per-level export control.
255
+
256
+ ```js
257
+ const data = [
258
+ { dept: 'Eng', team: 'Frontend', name: 'Alice', salary: 100 },
259
+ { dept: 'Eng', team: 'Frontend', name: 'Bob', salary: 110 },
260
+ { dept: 'Eng', team: 'Backend', name: 'Carol', salary: 120 },
261
+ { dept: 'Sales', team: 'Enterprise', name: 'Dave', salary: 90 },
262
+ ];
263
+
264
+ // Nested object: { dept: { team: [items] } }
265
+ tidy(data,
266
+ groupBy(['dept', 'team'], [],
267
+ groupBy.levels({ levels: ['object', 'object'] })
268
+ )
269
+ )
270
+ // => {
271
+ // Eng: { Frontend: [Alice, Bob], Backend: [Carol] },
272
+ // Sales: { Enterprise: [Dave] }
273
+ // }
274
+ ```
275
+
276
+ **Flat export with composite keys:**
277
+
278
+ ```js
279
+ tidy(data,
280
+ groupBy(['dept', 'team'], [summarize({ total: sum('salary') })],
281
+ groupBy.object({ flat: true, compositeKey: (keys) => keys.join(' > ') })
282
+ )
283
+ )
284
+ // => { 'Eng > Frontend': [...], 'Eng > Backend': [...], 'Sales > Enterprise': [...] }
285
+ ```
286
+
287
+ ---
288
+
289
+ ## 8. Join and Enrich
290
+
291
+ Add columns from a lookup table.
292
+
293
+ ```js
294
+ const orders = [
295
+ { orderId: 1, productId: 'A', qty: 5 },
296
+ { orderId: 2, productId: 'B', qty: 3 },
297
+ { orderId: 3, productId: 'A', qty: 2 },
298
+ ];
299
+
300
+ const products = [
301
+ { productId: 'A', name: 'Widget', price: 10 },
302
+ { productId: 'B', name: 'Gadget', price: 25 },
303
+ ];
304
+
305
+ tidy(orders,
306
+ leftJoin(products, { by: 'productId' }),
307
+ mutate({ total: (d) => d.qty * d.price }),
308
+ arrange(desc('total'))
309
+ )
310
+ // => [
311
+ // { orderId: 2, productId: 'B', qty: 3, name: 'Gadget', price: 25, total: 75 },
312
+ // { orderId: 1, productId: 'A', qty: 5, name: 'Widget', price: 10, total: 50 },
313
+ // { orderId: 3, productId: 'A', qty: 2, name: 'Widget', price: 10, total: 20 },
314
+ // ]
315
+ ```
316
+
317
+ ---
318
+
319
+ ## 9. Top-N Per Group
320
+
321
+ Get the highest/lowest items within each group.
322
+
323
+ ```js
324
+ const data = [
325
+ { category: 'A', name: 'a1', score: 90 },
326
+ { category: 'A', name: 'a2', score: 85 },
327
+ { category: 'A', name: 'a3', score: 70 },
328
+ { category: 'B', name: 'b1', score: 95 },
329
+ { category: 'B', name: 'b2', score: 60 },
330
+ ];
331
+
332
+ // Top 2 per category
333
+ tidy(data,
334
+ groupBy('category', [
335
+ arrange(desc('score')),
336
+ sliceHead(2),
337
+ ])
338
+ )
339
+ // => [
340
+ // { category: 'A', name: 'a1', score: 90 },
341
+ // { category: 'A', name: 'a2', score: 85 },
342
+ // { category: 'B', name: 'b1', score: 95 },
343
+ // { category: 'B', name: 'b2', score: 60 },
344
+ // ]
345
+ ```
346
+
347
+ **Alternative using sliceMax:**
348
+
349
+ ```js
350
+ tidy(data,
351
+ groupBy('category', [
352
+ sliceMax(2, 'score'),
353
+ ])
354
+ )
355
+ ```
356
+
357
+ ---
358
+
359
+ ## 10. Lag/Lead for Period-Over-Period Comparison
360
+
361
+ Calculate change from previous period.
362
+
363
+ ```js
364
+ const data = [
365
+ { month: 'Jan', revenue: 100 },
366
+ { month: 'Feb', revenue: 120 },
367
+ { month: 'Mar', revenue: 110 },
368
+ ];
369
+
370
+ tidy(data,
371
+ mutateWithSummary({
372
+ prevRevenue: lag('revenue', { default: 0 }),
373
+ }),
374
+ mutate({
375
+ change: (d) => d.revenue - d.prevRevenue,
376
+ pctChange: (d) => d.prevRevenue ? (d.revenue - d.prevRevenue) / d.prevRevenue : null,
377
+ })
378
+ )
379
+ // => [
380
+ // { month: 'Jan', revenue: 100, prevRevenue: 0, change: 100, pctChange: null },
381
+ // { month: 'Feb', revenue: 120, prevRevenue: 100, change: 20, pctChange: 0.2 },
382
+ // { month: 'Mar', revenue: 110, prevRevenue: 120, change: -10, pctChange: -0.083 },
383
+ // ]
384
+ ```