npm - @rip-lang/csv - Versions diffs - 1.0.1 - Mend

@rip-lang/csv 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,240 @@
+<img src="https://raw.githubusercontent.com/shreeve/rip-lang/main/docs/rip.svg" style="width:50px" /> <br>
+# Rip CSV - @rip-lang/csv
+> **Fast, flexible CSV parser and writer — indexOf ratchet engine, auto-detection, zero dependencies**
+A high-performance CSV library for Rip that uses the JavaScript engine's
+SIMD-accelerated `indexOf` to skip over content in bulk. Auto-detects
+delimiters, quoting, escaping, BOM, and line endings. Supports excel mode,
+relax mode, headers, comments, streaming via row callback, and reusable
+writer instances. ~300 lines of Rip, zero dependencies.
+## Quick Start
+```bash
+bun add @rip-lang/csv
+```
+```coffee
+import { CSV } from '@rip-lang/csv'
+# Parse a string
+rows = CSV.read "name,age\nAlice,30\nBob,25\n"
+# [['name','age'], ['Alice','30'], ['Bob','25']]
+# Parse with headers (returns objects)
+users = CSV.read "name,age\nAlice,30\nBob,25\n", headers: true
+# [{name: 'Alice', age: '30'}, {name: 'Bob', age: '25'}]
+# Parse a file
+data = CSV.load! 'data.csv'
+data = CSV.load! 'data.csv', headers: true
+# Write CSV
+str = CSV.write [['a','b'], ['1','2']]
+# "a,b\n1,2\n"
+# Write to file
+CSV.save! 'out.csv', rows
+```
+## How It Works
+The parser uses an **indexOf ratchet** — a technique where the JavaScript
+engine's native `indexOf` (backed by SIMD instructions in V8 and JSC) does
+the heavy lifting. Instead of inspecting every character, the parser calls
+`indexOf` to jump directly to the next delimiter, newline, or quote. Each
+call can skip hundreds of bytes in a single native operation.
+```
+Source string:  "Alice,30,New York\nBob,25,Chicago\n..."
+                 ↑     ↑  ↑         ↑
+                 │     │  │         └── indexOf('\n') jumps here
+                 │     │  └── indexOf(',') jumps here
+                 │     └── indexOf(',') jumps here
+                 └── start
+Each indexOf call skips bulk content via SIMD — no per-byte scanning in JS.
+```
+The parser has two code paths, selected at startup by probing the first ~8KB:
+- **Fast path** — no quotes detected: pure indexOf for separators and newlines
+- **Full path** — quotes present: indexOf ratchet with quote/escape handling
+## Reading
+### Basic Parsing
+```coffee
+# Auto-detects delimiter, quoting, line endings
+rows = CSV.read str
+# Tab-separated, pipe-separated — auto-detected
+rows = CSV.read "a\tb\tc\n1\t2\t3\n"
+rows = CSV.read "a|b|c\n1|2|3\n"
+# Explicit separator
+rows = CSV.read str, sep: ';'
+```
+### Headers Mode
+```coffee
+# First row becomes object keys
+users = CSV.read str, headers: true
+# [{name: 'Alice', age: '30'}, ...]
+console.log users[0].name  # "Alice"
+```
+### Row-by-Row Processing
+```coffee
+# Process rows one at a time without building an array
+count = CSV.read str, each: (row, index) ->
+  console.log "Row #{index}: #{row}"
+# Early halt by returning false
+CSV.read str, each: (row) ->
+  if row[0] is 'STOP'
+    return false
+  process(row)
+```
+### File I/O
+```coffee
+# Read a file (async)
+rows = CSV.load! 'data.csv'
+rows = CSV.load! 'data.csv', headers: true, strip: true
+# Row-by-row file processing
+CSV.load! 'huge.csv', each: (row) -> db.insert!(row)
+```
+### Excel Mode
+```coffee
+# Handles ="01" literals (preserves leading zeros)
+rows = CSV.read '="01",hello\n', excel: true
+# [['01', 'hello']]
+```
+### Relax Mode
+```coffee
+# Recovers from stray/unmatched quotes instead of throwing
+rows = CSV.read str, relax: true
+```
+## Writing
+### Basic Writing
+```coffee
+str = CSV.write [['name','age'], ['Alice','30']]
+# "name,age\nAlice,30\n"
+# Write to file (async)
+CSV.save! 'out.csv', rows
+```
+### Format a Single Row
+```coffee
+line = CSV.formatRow ['Alice', 'New York, NY', '30']
+# 'Alice,"New York, NY",30'
+```
+### Reusable Writer
+```coffee
+w = CSV.writer(sep: '\t', excel: true)
+for record in records
+  line = w.row(record)
+  stream.write "#{line}\n"
+# Or format all at once
+output = w.rows(records)
+```
+### Writer Modes
+```coffee
+# Compact (default): quote only when necessary
+CSV.write rows, mode: 'compact'
+# Full: quote every field
+CSV.write rows, mode: 'full'
+# Excel: emit ="0123" for leading-zero numbers
+CSV.write rows, excel: true
+# Drop trailing empty columns
+CSV.write rows, drop: true
+```
+## Options Reference
+### Reader Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `sep` | string | auto | Field delimiter (`,` `\t` `\|` `;` or any string) |
+| `quote` | string | `"` | Quote/enclosure character |
+| `escape` | string | same as `quote` | Escape character (`"` for doubled, `\` for backslash) |
+| `headers` | boolean | `false` | First row as keys — return objects |
+| `excel` | boolean | `false` | Handle `="01"` literals |
+| `relax` | boolean | `false` | Recover from stray quotes |
+| `strip` | boolean | `false` | Trim whitespace from fields |
+| `comments` | string | `null` | Skip lines starting with this character |
+| `skipBlanks` | boolean | `true` | Skip blank lines |
+| `row` | string | auto | Line ending override (`\n`, `\r\n`, `\r`) |
+| `each` | function | `null` | `(row, index) ->` callback per row |
+### Writer Options
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `sep` | string | `','` | Field delimiter |
+| `quote` | string | `'"'` | Quote character |
+| `escape` | string | same as `quote` | Escape character |
+| `mode` | string | `'compact'` | `'compact'` or `'full'` |
+| `excel` | boolean | `false` | Emit `="0123"` for leading zeros |
+| `drop` | boolean | `false` | Drop trailing empty columns |
+| `rowsep` | string | `'\n'` | Row separator |
+> **Note:** The writer defaults to doubled-quote escaping (`""`). Pass
+> `escape: '\\'` for backslash style.
+## Auto-Detection
+When you call `CSV.read(str)` with no options, the probe function scans the
+first ~8KB to automatically detect:
+- **BOM** — strips UTF-8 BOM if present
+- **`sep=` header** — Excel convention for declaring delimiter
+- **Delimiter** — tries `,` `\t` `|` `;`, picks the most frequent
+- **Quote character** — detects if `"` appears in the sample
+- **Escape style** — `\"` (backslash) vs `""` (doubled quote)
+- **Line endings** — `\r\n`, `\n`, or `\r`
+User options override any probed value.
+## API Summary
+```coffee
+CSV.read(str, opts)            # parse string -> rows or objects
+CSV.load!(path, opts)          # parse file (async)
+CSV.write(rows, opts)          # format rows -> CSV string
+CSV.save!(path, rows, opts)    # write to file (async)
+CSV.writer(opts)               # create reusable Writer instance
+CSV.formatRow(row, opts)       # format single row -> string
+```
+## License
+MIT

package/csv.rip ADDED Viewed

@@ -0,0 +1,437 @@
+# ==============================================================================
+# csv — Fast, flexible CSV parser and writer for Rip
+#
+# Author: Steve Shreeve (steve.shreeve@gmail.com)
+#   Date: February 6, 2026
+#
+# Engine: indexOf ratchet — SIMD-accelerated scanning via the JS engine's
+#         native indexOf, skipping bulk content in a single call. No regex
+#         in the hot loop. Auto-detects delimiter, quoting, escaping, BOM,
+#         and line endings. Supports excel mode, relax mode, headers, comments,
+#         streaming via row callback, and reusable writer instances.
+# ==============================================================================
+# ==[ Constants ]==
+DELIMITERS =! [',', '\t', '|', ';']
+CRLF       =! '\r\n'
+CR         =! 13      # \r
+LF         =! 10      # \n
+EQ         =! 61      # =
+# ==============================================================================
+# Probe — auto-detect CSV dialect from the first few KB
+# ==============================================================================
+def probe(str, opts = {})
+  # strip BOM
+  if str.charCodeAt(0) is 0xFEFF
+    str = str.slice(1)
+  # detect sep= header (Excel convention)
+  if str[0..3] is "sep="
+    end = str.indexOf('\n')
+    end = str.indexOf('\r') if end is -1
+    stop = (end > 0 and str.charCodeAt(end - 1) is CR) ? end - 1 : end
+    sep = str.slice(4, stop)
+    str = str.slice(end + 1) if end >= 0
+  # sample first ~8KB for sniffing
+  sample = str.slice(0, 8192)
+  # detect line ending style
+  cr = sample.indexOf('\r')
+  lf = sample.indexOf('\n')
+  if cr >= 0 and lf is cr + 1
+    row = CRLF
+  else if lf >= 0
+    row = '\n'
+  else if cr >= 0
+    row = '\r'
+  else
+    row = '\n'
+  # detect delimiter from first line
+  lineEnd = sample.indexOf(row is CRLF ? '\r' : row)
+  lineEnd = sample.length if lineEnd is -1
+  firstLine = sample.slice(0, lineEnd)
+  unless opts.sep
+    best = null
+    bestCount = 0
+    for d in DELIMITERS
+      n = 0
+      i = -1
+      n++ while (i = firstLine.indexOf(d, i + 1)) isnt -1
+      if n > bestCount
+        best = d
+        bestCount = n
+    sep ?= best ? ','
+  # detect quoting
+  quote = opts.quote ? '"'
+  hasQuotes = sample.indexOf(quote) >= 0
+  # detect escape style: backslash vs doubled quote
+  escape = opts.escape
+  unless escape
+    if hasQuotes
+      escape = sample.indexOf("\\#{quote}") >= 0 ? '\\' : quote
+    else
+      escape = quote
+  # merge with user options (user wins)
+  {
+    str
+    sep:            opts.sep ? sep
+    quote:          quote
+    escape:         escape
+    row:            opts.row ? row
+    hasQuotes:      hasQuotes
+    excel:          opts.excel ? false
+    relax:          opts.relax ? false
+    strip:          opts.strip ? false
+    headers:        opts.headers ? false
+    comments:       opts.comments ? null
+    skipBlanks:     opts.skipBlanks ? true
+    each:           opts.each ?? null
+  }
+# ==============================================================================
+# Helpers — emit rows with headers/callback support
+# ==============================================================================
+def makeEmitter(cfg)
+  {headers, strip, each} = cfg
+  ctx = {keys: null, rows: (each ? null : []), count: 0}
+  emit = (row) ->
+    row = row.map((c) -> c.trim()) if strip
+    # first row becomes keys in headers mode
+    if headers and not ctx.keys
+      ctx.keys = row
+      return true
+    # zip with keys for object output
+    if ctx.keys
+      obj = {}
+      for key, i in ctx.keys
+        obj[key] = row[i] ? ''
+      if each
+        ctx.count++
+        return each(obj, ctx.count - 1) isnt false
+      ctx.rows.push obj
+      return true
+    # plain array output
+    if each
+      ctx.count++
+      return each(row, ctx.count - 1) isnt false
+    ctx.rows.push row
+    true
+  result = -> each ? ctx.count : ctx.rows
+  {emit, result}
+# ==============================================================================
+# Helper — advance past \r\n or single \r or \n
+# ==============================================================================
+def crlfLen(str, pos)
+  if str.charCodeAt(pos) is CR and str.charCodeAt(pos + 1) is LF then 2 else 1
+# ==============================================================================
+# Reader — Fast path (no quotes detected)
+# ==============================================================================
+def readFast(str, cfg)
+  {sep, row: rowDelim, comments, skipBlanks} = cfg
+  {emit, result} = makeEmitter(cfg)
+  sepLen = sep.length
+  rowLen = rowDelim.length
+  len    = str.length
+  pos    = 0
+  while pos < len
+    # find end of current line
+    rowEnd = str.indexOf(rowDelim, pos)
+    rowEnd = len if rowEnd is -1
+    # skip empty lines
+    if pos is rowEnd
+      pos = rowEnd + rowLen
+      continue if skipBlanks
+    # skip comment lines
+    if comments and str[pos] is comments
+      pos = rowEnd + rowLen
+      continue
+    # extract fields with indexOf ratchet for separator
+    row = []
+    p = pos
+    loop
+      s = str.indexOf(sep, p)
+      if s >= 0 and s < rowEnd
+        row.push str.slice(p, s)
+        p = s + sepLen
+        if p >= rowEnd
+          row.push ''  # trailing separator -> empty final field
+          break
+      else
+        row.push str.slice(p, rowEnd)
+        break
+    pos = rowEnd + rowLen
+    # trim trailing \r for mixed line endings
+    last = row.length - 1
+    if last >= 0 and row[last].endsWith('\r')
+      row[last] = row[last].slice(0, -1)
+    break unless emit(row)
+  result()
+# ==============================================================================
+# Reader — Full path (quotes present)
+# ==============================================================================
+def readFull(str, cfg)
+  {sep, quote, escape, excel, relax} = cfg
+  {comments, skipBlanks} = cfg
+  {emit, result} = makeEmitter(cfg)
+  sepCode   = sep.charCodeAt(0)
+  quoteCode = quote.charCodeAt(0)
+  sepLen    = sep.length
+  escSame   = escape is quote
+  escCode   = escape.charCodeAt(0)
+  len       = str.length
+  pos       = 0
+  row = []
+  atLineStart = true
+  while pos < len
+    c = str.charCodeAt(pos)
+    # skip empty lines at line start
+    if atLineStart
+      if skipBlanks and (c is LF or c is CR)
+        pos += crlfLen(str, pos)
+        continue
+      if comments and str[pos] is comments
+        nl = str.indexOf('\n', pos)
+        if nl is -1
+          nl = str.indexOf('\r', pos)
+        pos = nl is -1 ? len : nl + 1
+        continue
+      atLineStart = false
+    # === quoted field ===
+    if c is quoteCode or (excel and c is EQ and str.charCodeAt(pos + 1) is quoteCode)
+      if excel and c is EQ
+        pos += 2  # skip ="
+      else
+        pos += 1  # skip opening quote
+      field = ''
+      loop
+        # indexOf to jump to next quote — bulk skip over content
+        q = str.indexOf(quote, pos)
+        unless q >= 0
+          # no closing quote found
+          if relax
+            field += str.slice(pos)
+            pos = len
+            break
+          throw new Error "CSV: unclosed quote at position #{pos}"
+        field += str.slice(pos, q)
+        pos = q + quote.length
+        # doubled-quote escape: "" -> "
+        if escSame
+          if pos < len and str.charCodeAt(pos) is quoteCode
+            field += quote
+            pos += quote.length
+            continue
+        else
+          # backslash escape: \" -> "
+          if q > 0 and str.charCodeAt(q - 1) is escCode
+            field = field.slice(0, -1) + quote
+            continue
+        # check what follows the closing quote
+        break if pos >= len  # end of string
+        c2 = str.charCodeAt(pos)
+        break if c2 is sepCode or c2 is LF or c2 is CR  # valid end-of-field
+        # unexpected character after closing quote
+        unless relax
+          throw new Error "CSV: unexpected character after quote at position #{pos}"
+        # relax mode: treat the quote as literal, keep scanning
+        field += quote
+        continue
+      # push field and consume trailing delimiter
+      row.push field
+      if pos < len
+        c2 = str.charCodeAt(pos)
+        if c2 is sepCode
+          pos += sepLen
+        else if c2 is LF or c2 is CR
+          pos += crlfLen(str, pos)
+          break unless emit(row)
+          row = []
+          atLineStart = true
+    # === newline (end of row) ===
+    else if c is LF or c is CR
+      pos += crlfLen(str, pos)
+      break unless emit(row)
+      row = []
+      atLineStart = true
+    # === separator (empty field) ===
+    else if c is sepCode
+      row.push ''
+      pos += sepLen
+    # === unquoted field ===
+    else
+      # indexOf ratchet: find nearest sep or newline
+      s = str.indexOf(sep, pos)
+      n = str.indexOf('\n', pos)
+      r = str.indexOf('\r', pos)
+      # nearest newline (\r or \n)
+      if r >= 0 and n >= 0
+        nl = Math.min(r, n)
+      else if r >= 0
+        nl = r
+      else
+        nl = n
+      # take the nearer boundary
+      if s >= 0 and (nl is -1 or s < nl)
+        row.push str.slice(pos, s)
+        pos = s + sepLen
+      else if nl >= 0
+        row.push str.slice(pos, nl)
+        pos = nl + crlfLen(str, nl)
+        break unless emit(row)
+        row = []
+        atLineStart = true
+      else
+        row.push str.slice(pos)
+        pos = len
+  # emit final row if pending
+  emit(row) if row.length > 0
+  result()
+# ==============================================================================
+# Writer — format data as CSV strings
+# ==============================================================================
+class Writer
+  constructor: (opts = {}) ->
+    @sep    = opts.sep    ? ','
+    @quote  = opts.quote  ? '"'
+    @escape = opts.escape ? @quote
+    @mode   = opts.mode   ? 'compact'
+    @excel  = opts.excel  ? false
+    @drop   = opts.drop   ? false
+    @rowsep = opts.rowsep ? '\n'
+    # pre-compute escaped quote
+    @esc      = @escape + @quote
+    @leadZero = /^0\d+$/
+  # check if a cell value needs quoting
+  needsQuote: (cell) ->
+    cell.indexOf(@sep)  >= 0 or
+    cell.indexOf('\n')  >= 0 or
+    cell.indexOf('\r')  >= 0 or
+    cell.indexOf(@quote) >= 0
+  # format a single row as a CSV line (no trailing row separator)
+  row: (data) ->
+    cells = (String(v ? '') for v in data)
+    # drop trailing empty columns
+    if @drop
+      cells.pop() while cells.length > 0 and cells[cells.length - 1] is ''
+    q   = @quote
+    esc = @esc
+    formatted = switch @mode
+      when 'compact'
+        for cell in cells
+          if @excel and @leadZero.test(cell)
+            "=#{q}#{cell}#{q}"
+          else if @needsQuote(cell)
+            "#{q}#{cell.replaceAll(q, esc)}#{q}"
+          else
+            cell
+      when 'full'
+        for cell in cells
+          if @excel and @leadZero.test(cell)
+            "=#{q}#{cell}#{q}"
+          else
+            "#{q}#{cell.replaceAll(q, esc)}#{q}"
+      else
+        cells
+    formatted.join @sep
+  # format multiple rows as a complete CSV string
+  rows: (data) ->
+    return '' unless data?.length
+    ((@row(r) for r in data).join(@rowsep)) + @rowsep
+# ==============================================================================
+# Public API
+# ==============================================================================
+export CSV =
+  # parse a CSV string into rows (arrays or objects)
+  read: (str, opts = {}) ->
+    return [] unless str?.length
+    cfg = probe(str, opts)
+    if cfg.hasQuotes
+      readFull(cfg.str, cfg)
+    else
+      readFast(cfg.str, cfg)
+  # format row arrays into a CSV string
+  write: (rows, opts = {}) ->
+    new Writer(opts).rows(rows)
+  # read and parse a CSV file (async — uses Bun.file)
+  load: (path, opts = {}) ->
+    str = Bun.file(path).text!
+    CSV.read str, opts
+  # write rows to a CSV file (async — uses Bun.write)
+  save: (path, rows, opts = {}) ->
+    Bun.write! path, CSV.write(rows, opts)
+  # create a reusable Writer instance
+  writer: (opts = {}) ->
+    new Writer(opts)
+  # format a single row (convenience — creates a one-shot Writer)
+  formatRow: (row, opts = {}) ->
+    new Writer(opts).row(row)

package/package.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+  "name": "@rip-lang/csv",
+  "version": "1.0.1",
+  "description": "Fast, flexible CSV parser and writer for Rip — indexOf ratchet engine, auto-detection, zero dependencies",
+  "type": "module",
+  "main": "csv.rip",
+  "exports": {
+    ".": "./csv.rip"
+  },
+  "scripts": {
+    "test": "rip test/basic.rip"
+  },
+  "keywords": [
+    "csv",
+    "parser",
+    "writer",
+    "fast",
+    "indexOf",
+    "bun",
+    "rip"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/shreeve/rip-lang.git",
+    "directory": "packages/csv"
+  },
+  "homepage": "https://github.com/shreeve/rip-lang/tree/main/packages/csv#readme",
+  "bugs": {
+    "url": "https://github.com/shreeve/rip-lang/issues"
+  },
+  "author": "Steve Shreeve <steve.shreeve@gmail.com>",
+  "license": "MIT",
+  "dependencies": {
+    "rip-lang": "^2.9.0"
+  },
+  "files": [
+    "csv.rip",
+    "README.md"
+  ]
+}