@rip-lang/csv 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +31 -0
  2. package/csv.rip +30 -39
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -235,6 +235,37 @@ CSV.writer(opts) # create reusable Writer instance
235
235
  CSV.formatRow(row, opts) # format single row -> string
236
236
  ```
237
237
 
238
+ ## Performance
239
+
240
+ The parser consistently delivers **300-430 MB/s** throughput on real-world
241
+ CSV files, scaling linearly from kilobytes to gigabytes:
242
+
243
+ | File | Size | Rows | Fields/row | Time | Throughput |
244
+ |------|------|------|-----------|------|-----------|
245
+ | Medical records | 10.5 MB | 43,962 | 44 | 39ms | 269 MB/s |
246
+ | Japanese postal codes | 10.9 MB | 124,565 | 15 | 26ms | 414 MB/s |
247
+ | Geodata | 24.8 MB | 662,061 | 6 | 65ms | 382 MB/s |
248
+ | Lab results (large) | 137.3 MB | 493,962 | 44 | 466ms | 294 MB/s |
249
+ | Lab results (XL) | 315.8 MB | 997,195 | 44 | 1.1s | 287 MB/s |
250
+ | Lab results (1GB+) | 1.2 GB | 3,497,822 | 44 | 4.1s | 298 MB/s |
251
+
252
+ Quote-free files hit the fast path (~420 MB/s). Files with quoted fields
253
+ use the full path (~300 MB/s). The `each` callback mode is slightly faster
254
+ than array mode since it skips array allocation.
255
+
256
+ For context, popular JS CSV parsers typically achieve 30-120 MB/s (Papa Parse,
257
+ csv-parse, d3-dsv). This library is comfortably in the top tier of the JS
258
+ ecosystem.
259
+
260
+ ## Roadmap
261
+
262
+ - **Streaming file reader** — chunked parsing for files that don't fit in
263
+ memory, splitting at safe quote boundaries
264
+ - **`transform` callback** — per-cell value transformation during parsing
265
+ - **`dynamicTyping`** — auto-convert `"42"` to `42`, `"true"` to `true`
266
+ - **Column selection** — parse only specific columns by index or name
267
+ - **Error/warning collection** — report recovered issues in relax mode
268
+
238
269
  ## License
239
270
 
240
271
  MIT
package/csv.rip CHANGED
@@ -66,10 +66,10 @@ def probe(str, opts = {})
66
66
  if n > bestCount
67
67
  best = d
68
68
  bestCount = n
69
- sep ?= best ? ','
69
+ sep ?= best ?? ','
70
70
 
71
71
  # detect quoting
72
- quote = opts.quote ? '"'
72
+ quote = opts.quote ?? '"'
73
73
  hasQuotes = sample.indexOf(quote) >= 0
74
74
 
75
75
  # detect escape style: backslash vs doubled quote
@@ -83,17 +83,17 @@ def probe(str, opts = {})
83
83
  # merge with user options (user wins)
84
84
  {
85
85
  str
86
- sep: opts.sep ? sep
86
+ sep: opts.sep ?? sep
87
87
  quote: quote
88
88
  escape: escape
89
- row: opts.row ? row
89
+ row: opts.row ?? row
90
90
  hasQuotes: hasQuotes
91
- excel: opts.excel ? false
92
- relax: opts.relax ? false
93
- strip: opts.strip ? false
94
- headers: opts.headers ? false
95
- comments: opts.comments ? null
96
- skipBlanks: opts.skipBlanks ? true
91
+ excel: opts.excel ?? false
92
+ relax: opts.relax ?? false
93
+ strip: opts.strip ?? false
94
+ headers: opts.headers ?? false
95
+ comments: opts.comments ?? null
96
+ skipBlanks: opts.skipBlanks ?? true
97
97
  each: opts.each ?? null
98
98
  }
99
99
 
@@ -117,7 +117,7 @@ def makeEmitter(cfg)
117
117
  if ctx.keys
118
118
  obj = {}
119
119
  for key, i in ctx.keys
120
- obj[key] = row[i] ? ''
120
+ obj[key] = row[i] ?? ''
121
121
  if each
122
122
  ctx.count++
123
123
  return each(obj, ctx.count - 1) isnt false
@@ -202,7 +202,7 @@ def readFast(str, cfg)
202
202
 
203
203
  def readFull(str, cfg)
204
204
  {sep, quote, escape, excel, relax} = cfg
205
- {comments, skipBlanks} = cfg
205
+ {row: rowDelim, comments, skipBlanks} = cfg
206
206
  {emit, result} = makeEmitter(cfg)
207
207
 
208
208
  sepCode = sep.charCodeAt(0)
@@ -210,6 +210,8 @@ def readFull(str, cfg)
210
210
  sepLen = sep.length
211
211
  escSame = escape is quote
212
212
  escCode = escape.charCodeAt(0)
213
+ nlChar = rowDelim[0] # '\n' or '\r' (for \r\n)
214
+ nlCode = nlChar.charCodeAt(0)
213
215
  len = str.length
214
216
  pos = 0
215
217
 
@@ -221,14 +223,12 @@ def readFull(str, cfg)
221
223
 
222
224
  # skip empty lines at line start
223
225
  if atLineStart
224
- if skipBlanks and (c is LF or c is CR)
226
+ if skipBlanks and c is nlCode
225
227
  pos += crlfLen(str, pos)
226
228
  continue
227
229
  if comments and str[pos] is comments
228
- nl = str.indexOf('\n', pos)
229
- if nl is -1
230
- nl = str.indexOf('\r', pos)
231
- pos = nl is -1 ? len : nl + 1
230
+ nl = str.indexOf(nlChar, pos)
231
+ pos = nl is -1 ? len : nl + crlfLen(str, nl)
232
232
  continue
233
233
  atLineStart = false
234
234
 
@@ -271,7 +271,7 @@ def readFull(str, cfg)
271
271
  break if pos >= len # end of string
272
272
 
273
273
  c2 = str.charCodeAt(pos)
274
- break if c2 is sepCode or c2 is LF or c2 is CR # valid end-of-field
274
+ break if c2 is sepCode or c2 is nlCode # valid end-of-field
275
275
 
276
276
  # unexpected character after closing quote
277
277
  unless relax
@@ -288,14 +288,14 @@ def readFull(str, cfg)
288
288
  c2 = str.charCodeAt(pos)
289
289
  if c2 is sepCode
290
290
  pos += sepLen
291
- else if c2 is LF or c2 is CR
291
+ else if c2 is nlCode
292
292
  pos += crlfLen(str, pos)
293
293
  break unless emit(row)
294
294
  row = []
295
295
  atLineStart = true
296
296
 
297
297
  # === newline (end of row) ===
298
- else if c is LF or c is CR
298
+ else if c is nlCode
299
299
  pos += crlfLen(str, pos)
300
300
  break unless emit(row)
301
301
  row = []
@@ -309,17 +309,8 @@ def readFull(str, cfg)
309
309
  # === unquoted field ===
310
310
  else
311
311
  # indexOf ratchet: find nearest sep or newline
312
- s = str.indexOf(sep, pos)
313
- n = str.indexOf('\n', pos)
314
- r = str.indexOf('\r', pos)
315
-
316
- # nearest newline (\r or \n)
317
- if r >= 0 and n >= 0
318
- nl = Math.min(r, n)
319
- else if r >= 0
320
- nl = r
321
- else
322
- nl = n
312
+ s = str.indexOf(sep, pos)
313
+ nl = str.indexOf(nlChar, pos)
323
314
 
324
315
  # take the nearer boundary
325
316
  if s >= 0 and (nl is -1 or s < nl)
@@ -346,13 +337,13 @@ def readFull(str, cfg)
346
337
 
347
338
  class Writer
348
339
  constructor: (opts = {}) ->
349
- @sep = opts.sep ? ','
350
- @quote = opts.quote ? '"'
351
- @escape = opts.escape ? @quote
352
- @mode = opts.mode ? 'compact'
353
- @excel = opts.excel ? false
354
- @drop = opts.drop ? false
355
- @rowsep = opts.rowsep ? '\n'
340
+ @sep = opts.sep ?? ','
341
+ @quote = opts.quote ?? '"'
342
+ @escape = opts.escape ?? @quote
343
+ @mode = opts.mode ?? 'compact'
344
+ @excel = opts.excel ?? false
345
+ @drop = opts.drop ?? false
346
+ @rowsep = opts.rowsep ?? '\n'
356
347
 
357
348
  # pre-compute escaped quote
358
349
  @esc = @escape + @quote
@@ -367,7 +358,7 @@ class Writer
367
358
 
368
359
  # format a single row as a CSV line (no trailing row separator)
369
360
  row: (data) ->
370
- cells = (String(v ? '') for v in data)
361
+ cells = (String(v ?? '') for v in data)
371
362
 
372
363
  # drop trailing empty columns
373
364
  if @drop
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rip-lang/csv",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "Fast, flexible CSV parser and writer for Rip — indexOf ratchet engine, auto-detection, zero dependencies",
5
5
  "type": "module",
6
6
  "main": "csv.rip",