npm - @rip-lang/csv - Versions diffs - 1.0.2 → 1.0.3 - Mend

@rip-lang/csv 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -235,6 +235,37 @@ CSV.writer(opts)               # create reusable Writer instance
 CSV.formatRow(row, opts)       # format single row -> string
 ```
+## Performance
+The parser consistently delivers **300-430 MB/s** throughput on real-world
+CSV files, scaling linearly from kilobytes to gigabytes:
+| File | Size | Rows | Fields/row | Time | Throughput |
+|------|------|------|-----------|------|-----------|
+| Medical records | 10.5 MB | 43,962 | 44 | 39ms | 269 MB/s |
+| Japanese postal codes | 10.9 MB | 124,565 | 15 | 26ms | 414 MB/s |
+| Geodata | 24.8 MB | 662,061 | 6 | 65ms | 382 MB/s |
+| Lab results (large) | 137.3 MB | 493,962 | 44 | 466ms | 294 MB/s |
+| Lab results (XL) | 315.8 MB | 997,195 | 44 | 1.1s | 287 MB/s |
+| Lab results (1GB+) | 1.2 GB | 3,497,822 | 44 | 4.1s | 298 MB/s |
+Quote-free files hit the fast path (~420 MB/s). Files with quoted fields
+use the full path (~300 MB/s). The `each` callback mode is slightly faster
+than array mode since it skips array allocation.
+For context, popular JS CSV parsers typically achieve 30-120 MB/s (Papa Parse,
+csv-parse, d3-dsv). This library is comfortably in the top tier of the JS
+ecosystem.
+## Roadmap
+- **Streaming file reader** — chunked parsing for files that don't fit in
+  memory, splitting at safe quote boundaries
+- **`transform` callback** — per-cell value transformation during parsing
+- **`dynamicTyping`** — auto-convert `"42"` to `42`, `"true"` to `true`
+- **Column selection** — parse only specific columns by index or name
+- **Error/warning collection** — report recovered issues in relax mode
 ## License
 MIT

package/csv.rip CHANGED Viewed

@@ -66,10 +66,10 @@ def probe(str, opts = {})
       if n > bestCount
         best = d
         bestCount = n
-    sep ?= best ? ','
+    sep ?= best ?? ','
   # detect quoting
-  quote = opts.quote ? '"'
+  quote = opts.quote ?? '"'
   hasQuotes = sample.indexOf(quote) >= 0
   # detect escape style: backslash vs doubled quote
@@ -83,17 +83,17 @@ def probe(str, opts = {})
   # merge with user options (user wins)
   {
     str
-    sep:            opts.sep ? sep
+    sep:            opts.sep ?? sep
     quote:          quote
     escape:         escape
-    row:            opts.row ? row
+    row:            opts.row ?? row
     hasQuotes:      hasQuotes
-    excel:          opts.excel ? false
-    relax:          opts.relax ? false
-    strip:          opts.strip ? false
-    headers:        opts.headers ? false
-    comments:       opts.comments ? null
-    skipBlanks:     opts.skipBlanks ? true
+    excel:          opts.excel ?? false
+    relax:          opts.relax ?? false
+    strip:          opts.strip ?? false
+    headers:        opts.headers ?? false
+    comments:       opts.comments ?? null
+    skipBlanks:     opts.skipBlanks ?? true
     each:           opts.each ?? null
   }
@@ -117,7 +117,7 @@ def makeEmitter(cfg)
     if ctx.keys
       obj = {}
       for key, i in ctx.keys
-        obj[key] = row[i] ? ''
+        obj[key] = row[i] ?? ''
       if each
         ctx.count++
         return each(obj, ctx.count - 1) isnt false
@@ -202,7 +202,7 @@ def readFast(str, cfg)
 def readFull(str, cfg)
   {sep, quote, escape, excel, relax} = cfg
-  {comments, skipBlanks} = cfg
+  {row: rowDelim, comments, skipBlanks} = cfg
   {emit, result} = makeEmitter(cfg)
   sepCode   = sep.charCodeAt(0)
@@ -210,6 +210,8 @@ def readFull(str, cfg)
   sepLen    = sep.length
   escSame   = escape is quote
   escCode   = escape.charCodeAt(0)
+  nlChar    = rowDelim[0]               # '\n' or '\r' (for \r\n)
+  nlCode    = nlChar.charCodeAt(0)
   len       = str.length
   pos       = 0
@@ -221,14 +223,12 @@ def readFull(str, cfg)
     # skip empty lines at line start
     if atLineStart
-      if skipBlanks and (c is LF or c is CR)
+      if skipBlanks and c is nlCode
         pos += crlfLen(str, pos)
         continue
       if comments and str[pos] is comments
-        nl = str.indexOf('\n', pos)
-        if nl is -1
-          nl = str.indexOf('\r', pos)
-        pos = nl is -1 ? len : nl + 1
+        nl = str.indexOf(nlChar, pos)
+        pos = nl is -1 ? len : nl + crlfLen(str, nl)
         continue
       atLineStart = false
@@ -271,7 +271,7 @@ def readFull(str, cfg)
         break if pos >= len  # end of string
         c2 = str.charCodeAt(pos)
-        break if c2 is sepCode or c2 is LF or c2 is CR  # valid end-of-field
+        break if c2 is sepCode or c2 is nlCode  # valid end-of-field
         # unexpected character after closing quote
         unless relax
@@ -288,14 +288,14 @@ def readFull(str, cfg)
         c2 = str.charCodeAt(pos)
         if c2 is sepCode
           pos += sepLen
-        else if c2 is LF or c2 is CR
+        else if c2 is nlCode
           pos += crlfLen(str, pos)
           break unless emit(row)
           row = []
           atLineStart = true
     # === newline (end of row) ===
-    else if c is LF or c is CR
+    else if c is nlCode
       pos += crlfLen(str, pos)
       break unless emit(row)
       row = []
@@ -309,17 +309,8 @@ def readFull(str, cfg)
     # === unquoted field ===
     else
       # indexOf ratchet: find nearest sep or newline
-      s = str.indexOf(sep, pos)
-      n = str.indexOf('\n', pos)
-      r = str.indexOf('\r', pos)
-      # nearest newline (\r or \n)
-      if r >= 0 and n >= 0
-        nl = Math.min(r, n)
-      else if r >= 0
-        nl = r
-      else
-        nl = n
+      s  = str.indexOf(sep, pos)
+      nl = str.indexOf(nlChar, pos)
       # take the nearer boundary
       if s >= 0 and (nl is -1 or s < nl)
@@ -346,13 +337,13 @@ def readFull(str, cfg)
 class Writer
   constructor: (opts = {}) ->
-    @sep    = opts.sep    ? ','
-    @quote  = opts.quote  ? '"'
-    @escape = opts.escape ? @quote
-    @mode   = opts.mode   ? 'compact'
-    @excel  = opts.excel  ? false
-    @drop   = opts.drop   ? false
-    @rowsep = opts.rowsep ? '\n'
+    @sep    = opts.sep    ?? ','
+    @quote  = opts.quote  ?? '"'
+    @escape = opts.escape ?? @quote
+    @mode   = opts.mode   ?? 'compact'
+    @excel  = opts.excel  ?? false
+    @drop   = opts.drop   ?? false
+    @rowsep = opts.rowsep ?? '\n'
     # pre-compute escaped quote
     @esc      = @escape + @quote
@@ -367,7 +358,7 @@ class Writer
   # format a single row as a CSV line (no trailing row separator)
   row: (data) ->
-    cells = (String(v ? '') for v in data)
+    cells = (String(v ?? '') for v in data)
     # drop trailing empty columns
     if @drop

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rip-lang/csv",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "Fast, flexible CSV parser and writer for Rip — indexOf ratchet engine, auto-detection, zero dependencies",
   "type": "module",
   "main": "csv.rip",