web-csv-toolbox 0.0.0-next-20240421152231 → 0.0.0-next-20240422065950
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/Lexer.cjs +1 -1
- package/dist/cjs/Lexer.cjs.map +1 -1
- package/dist/cjs/_virtual/web_csv_toolbox_wasm_bg.wasm.cjs +1 -1
- package/dist/es/Lexer.js +40 -22
- package/dist/es/Lexer.js.map +1 -1
- package/dist/es/_virtual/web_csv_toolbox_wasm_bg.wasm.js +1 -1
- package/dist/types/Lexer.d.ts +19 -0
- package/dist/web-csv-toolbox.umd.cjs +1 -1
- package/dist/web-csv-toolbox.umd.cjs.map +1 -1
- package/dist/web_csv_toolbox_wasm_bg.wasm +0 -0
- package/package.json +1 -1
package/dist/cjs/Lexer.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const t=require("./assertCommonOptions.cjs"),e=require("./common/constants.cjs"),i=require("./constants.cjs"),s=require("./utils/escapeRegExp.cjs");exports.Lexer=class{#t;#e;#i
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const t=require("./assertCommonOptions.cjs"),e=require("./common/constants.cjs"),i=require("./constants.cjs"),s=require("./utils/escapeRegExp.cjs");exports.Lexer=class{#t;#e;#i="";#s=!1;#r;constructor({delimiter:e=i.COMMA,quotation:r=i.DOUBLE_QUOTE}={}){t.assertCommonOptions({delimiter:e,quotation:r}),this.#t=e,this.#e=r;const f=s.escapeRegExp(e),u=s.escapeRegExp(r);this.#r=new RegExp(`^(?:(?!${u})(?!${f})(?![\\r\\n]))([\\S\\s\\uFEFF\\xA0]+?)(?=${u}|${f}|\\r|\\n|$)`)}lex(t,e=!1){return e||(this.#s=!0),"string"==typeof t&&0!==t.length&&(this.#i+=t),this.#f()}flush(){return this.#s=!0,[...this.#f()]}*#f(){this.#s&&(this.#i.endsWith(i.CRLF)?this.#i=this.#i.slice(0,-i.CRLF.length):this.#i.endsWith(i.LF)&&(this.#i=this.#i.slice(0,-i.LF.length)));let t=null;for(let i;i=this.#u();)switch(i){case e.FieldDelimiter:case e.RecordDelimiter:t&&(yield t,t=null),yield i;break;default:t?t.value+=i.value:t=i}t&&(yield t)}#u(){if(0===this.#i.length)return null;if(!1===this.#s&&(this.#i===i.CRLF||this.#i===i.LF))return null;if(this.#i.startsWith(i.CRLF))return this.#i=this.#i.slice(2),e.RecordDelimiter;if(this.#i.startsWith(i.LF))return this.#i=this.#i.slice(1),e.RecordDelimiter;if(this.#i.startsWith(this.#t))return this.#i=this.#i.slice(1),e.FieldDelimiter;if(this.#i.startsWith(this.#e))return!1===this.#s&&this.#i.endsWith(this.#e)?null:this.#h();const t=this.#r.exec(this.#i);return t?!1===this.#s&&t[0].length===this.#i.length?null:(this.#i=this.#i.slice(t[0].length),{type:e.Field,value:t[0]}):null}#h(){let t=1,s="";for(;t<this.#i.length;)if(this.#i.slice(t,t+1)!==this.#e||this.#i.slice(t+1,t+2)!==this.#e){if(this.#i.slice(t,t+1)===this.#e)return!1===this.#s&&t+1<this.#i.length&&this.#i.slice(t+1,1)!==this.#t&&this.#i.slice(t+1,t+1+2)!==i.CRLF&&this.#i.slice(t+1,t+1+1)!==i.LF?null:(this.#i=this.#i.slice(t+1),{type:e.Field,value:s});s+=this.#i[t],t++}else s+=this.#e,t+=2;return null}};
|
|
2
2
|
//# sourceMappingURL=Lexer.cjs.map
|
package/dist/cjs/Lexer.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"Lexer.cjs","sources":["../../src/Lexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport type { CommonOptions, Token } from \"./common/types.ts\";\nimport { COMMA, CRLF, DOUBLE_QUOTE, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\nexport class Lexer {\n #delimiter: string;\n #
|
|
1
|
+
{"version":3,"file":"Lexer.cjs","sources":["../../src/Lexer.ts"],"sourcesContent":["import { assertCommonOptions } from \"./assertCommonOptions.ts\";\nimport { Field, FieldDelimiter, RecordDelimiter } from \"./common/constants.ts\";\nimport type { CommonOptions, Token } from \"./common/types.ts\";\nimport { COMMA, CRLF, DOUBLE_QUOTE, LF } from \"./constants.ts\";\nimport { escapeRegExp } from \"./utils/escapeRegExp.ts\";\n\n/**\n * CSV Lexer.\n *\n * Lexter tokenizes CSV data into fields and records.\n */\nexport class Lexer {\n #delimiter: string;\n #quotation: string;\n #buffer = \"\";\n #flush = false;\n #matcher: RegExp;\n\n /**\n * Constructs a new Lexer instance.\n * @param options - The common options for the lexer.\n */\n constructor({\n delimiter = COMMA,\n quotation = DOUBLE_QUOTE,\n }: CommonOptions = {}) {\n assertCommonOptions({ delimiter, quotation });\n this.#delimiter = delimiter;\n this.#quotation = quotation;\n const d = escapeRegExp(delimiter);\n const q = escapeRegExp(quotation);\n this.#matcher = new RegExp(\n `^(?:(?!${q})(?!${d})(?![\\\\r\\\\n]))([\\\\S\\\\s\\\\uFEFF\\\\xA0]+?)(?=${q}|${d}|\\\\r|\\\\n|$)`,\n );\n }\n\n /**\n * Lexes the given chunk of CSV data.\n * @param chunk - The chunk of CSV data to be lexed.\n * @param buffering - Indicates whether the lexer is buffering or not.\n * @returns An iterable iterator of tokens.\n */\n public lex(chunk: string | null, buffering = false): IterableIterator<Token> {\n if (!buffering) {\n this.#flush = true;\n }\n if (typeof chunk === \"string\" && chunk.length !== 0) {\n this.#buffer += chunk;\n }\n\n return this.#tokens();\n }\n\n /**\n * Flushes the lexer and returns any remaining tokens.\n * @returns An array of tokens.\n */\n public flush(): Token[] {\n this.#flush = true;\n return [...this.#tokens()];\n }\n\n /**\n * Generates tokens from the buffered CSV data.\n * @yields Tokens from the buffered CSV data.\n */\n *#tokens(): Generator<Token> {\n if (this.#flush) {\n // Trim the last CRLF or LF\n if (this.#buffer.endsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(0, -CRLF.length);\n } else if (this.#buffer.endsWith(LF)) {\n this.#buffer = this.#buffer.slice(0, -LF.length);\n }\n }\n let currentField: Token | null = null;\n for (let token: Token | null; (token = this.#nextToken()); ) {\n switch (token) {\n case FieldDelimiter:\n if (currentField) {\n yield currentField;\n currentField = null;\n }\n yield token;\n break;\n case RecordDelimiter:\n if (currentField) {\n yield currentField;\n currentField = null;\n }\n yield token;\n break;\n default:\n // If currentField is not null, append the new token's value to it\n if (currentField) {\n currentField.value += token.value;\n } else {\n currentField = token;\n }\n break;\n }\n }\n if (currentField) {\n yield currentField;\n }\n }\n\n /**\n * Retrieves the next token from the buffered CSV data.\n * @returns The next token or null if there are no more tokens.\n */\n #nextToken(): Token | null {\n if (this.#buffer.length === 0) {\n return null;\n }\n // Buffer is Record Delimiter, defer to the next iteration.\n if (\n this.#flush === false &&\n (this.#buffer === CRLF || this.#buffer === LF)\n ) {\n return null;\n }\n\n // Check for CRLF\n if (this.#buffer.startsWith(CRLF)) {\n this.#buffer = this.#buffer.slice(2);\n return RecordDelimiter;\n }\n\n // Check for LF\n if (this.#buffer.startsWith(LF)) {\n this.#buffer = this.#buffer.slice(1);\n return RecordDelimiter;\n }\n\n // Check for Delimiter\n if (this.#buffer.startsWith(this.#delimiter)) {\n this.#buffer = this.#buffer.slice(1);\n return FieldDelimiter;\n }\n\n // Check for Quoted String\n if (this.#buffer.startsWith(this.#quotation)) {\n // If not flushing and the buffer doesn't end with a quote, then return null.\n if (this.#flush === false && this.#buffer.endsWith(this.#quotation)) {\n return null;\n }\n return this.#extractQuotedString();\n }\n\n // Check for Unquoted String\n const match = this.#matcher.exec(this.#buffer);\n if (match) {\n // If we're flushing and the match doesn't consume the entire buffer,\n // then return null\n if (this.#flush === false && match[0].length === this.#buffer.length) {\n return null;\n }\n this.#buffer = this.#buffer.slice(match[0].length);\n return { type: Field, value: match[0] };\n }\n\n // Otherwise, return null\n return null;\n }\n\n /**\n * Extracts a quoted string token from the buffered CSV data.\n * @returns The quoted string token or null if the string is not properly quoted.\n */\n #extractQuotedString(): Token | null {\n let end = 1; // Skip the opening quote\n let value = \"\";\n\n while (end < this.#buffer.length) {\n // Escaped quote\n if (\n this.#buffer.slice(end, end + 1) === this.#quotation &&\n this.#buffer.slice(end + 1, end + 1 * 2) === this.#quotation\n ) {\n value += this.#quotation;\n end += 1 * 2;\n continue;\n }\n\n // Closing quote\n if (this.#buffer.slice(end, end + 1) === this.#quotation) {\n // If flushing and the buffer doesn't end with a quote, then return null\n if (\n this.#flush === false &&\n end + 1 < this.#buffer.length &&\n this.#buffer.slice(end + 1, 1) !== this.#delimiter &&\n this.#buffer.slice(end + 1, end + 1 + 2 /** CRLF.length */) !==\n CRLF &&\n this.#buffer.slice(end + 1, end + 1 + 1 /** LF.length */) !== LF\n ) {\n return null;\n }\n\n // Otherwise, return the quoted string\n this.#buffer = this.#buffer.slice(end + 1);\n return { type: Field, value };\n }\n\n value += this.#buffer[end];\n end++;\n }\n\n // If we get here, we've reached the end of the buffer\n return null;\n }\n}\n"],"names":["delimiter","quotation","buffer","flush","matcher","constructor","COMMA","DOUBLE_QUOTE","assertCommonOptions","this","d","escapeRegExp","q","RegExp","lex","chunk","buffering","length","tokens","endsWith","CRLF","slice","LF","currentField","token","nextToken","FieldDelimiter","RecordDelimiter","value","startsWith","extractQuotedString","match","exec","type","Field","end"],"mappings":"kPAWO,MACLA,GACAC,GACAC,GAAU,GACVC,IAAS,EACTC,GAMA,WAAAC,EAAYL,UACVA,EAAYM,EAAAA,MAAAL,UACZA,EAAYM,EAAAA,cACK,IACGC,EAAAA,oBAAA,CAAER,YAAWC,cACjCQ,MAAKT,EAAaA,EAClBS,MAAKR,EAAaA,EACZ,MAAAS,EAAIC,eAAaX,GACjBY,EAAID,eAAaV,GACvBQ,MAAKL,EAAW,IAAIS,OAClB,UAAUD,QAAQF,6CAA6CE,KAAKF,eAExE,CAQO,GAAAI,CAAIC,EAAsBC,GAAY,GAQ3C,OAPKA,IACHP,MAAKN,GAAS,GAEK,iBAAVY,GAAuC,IAAjBA,EAAME,SACrCR,MAAKP,GAAWa,GAGXN,MAAKS,GACd,CAMO,KAAAf,GAEL,OADAM,MAAKN,GAAS,EACP,IAAIM,MAAKS,IAClB,CAMA,GAACA,GACKT,MAAKN,IAEHM,MAAKP,EAAQiB,SAASC,EAAIA,MAC5BX,MAAKP,EAAUO,MAAKP,EAAQmB,MAAM,GAAID,OAAKH,QAClCR,MAAKP,EAAQiB,SAASG,EAAEA,MACjCb,MAAKP,EAAUO,MAAKP,EAAQmB,MAAM,GAAIC,KAAGL,UAG7C,IAAIM,EAA6B,KACjC,IAAA,IAASC,EAAsBA,EAAQf,MAAKgB,KAC1C,OAAQD,GACN,KAAKE,EAAAA,eAOL,KAAKC,EAAAA,gBACCJ,UACIA,EACSA,EAAA,YAEXC,EACN,MACF,QAEMD,EACFA,EAAaK,OAASJ,EAAMI,MAEbL,EAAAC,EAKnBD,UACIA,EAEV,CAMA,EAAAE,GACM,GAAwB,IAAxBhB,MAAKP,EAAQe,OACR,OAAA,KAIP,IAAgB,IAAhBR,MAAKN,IACJM,MAAKP,IAAYkB,EAAAA,MAAQX,MAAKP,IAAYoB,EAAAA,IAEpC,OAAA,KAIT,GAAIb,MAAKP,EAAQ2B,WAAWT,EAAIA,MAEvB,OADPX,MAAKP,EAAUO,MAAKP,EAAQmB,MAAM,GAC3BM,kBAIT,GAAIlB,MAAKP,EAAQ2B,WAAWP,EAAEA,IAErB,OADPb,MAAKP,EAAUO,MAAKP,EAAQmB,MAAM,GAC3BM,kBAIT,GAAIlB,MAAKP,EAAQ2B,WAAWpB,MAAKT,GAExB,OADPS,MAAKP,EAAUO,MAAKP,EAAQmB,MAAM,GAC3BK,iBAIT,GAAIjB,MAAKP,EAAQ2B,WAAWpB,MAAKR,GAE3B,OAAgB,IAAhBQ,MAAKN,GAAoBM,MAAKP,EAAQiB,SAASV,MAAKR,GAC/C,KAEFQ,MAAKqB,IAId,MAAMC,EAAQtB,MAAKL,EAAS4B,KAAKvB,MAAKP,GACtC,OAAI6B,GAGkB,IAAhBtB,MAAKN,GAAoB4B,EAAM,GAAGd,SAAWR,MAAKP,EAAQe,OACrD,MAETR,MAAKP,EAAUO,MAAKP,EAAQmB,MAAMU,EAAM,GAAGd,QACpC,CAAEgB,KAAMC,EAAAA,MAAON,MAAOG,EAAM,KAI9B,IACT,CAMA,EAAAD,GACE,IAAIK,EAAM,EACNP,EAAQ,GAEL,KAAAO,EAAM1B,MAAKP,EAAQe,QAExB,GACER,MAAKP,EAAQmB,MAAMc,EAAKA,EAAM,KAAO1B,MAAKR,GAC1CQ,MAAKP,EAAQmB,MAAMc,EAAM,EAAGA,EAAM,KAAW1B,MAAKR,EAFpD,CAUI,GAAAQ,MAAKP,EAAQmB,MAAMc,EAAKA,EAAM,KAAO1B,MAAKR,EAE5C,OACkB,IAAhBQ,MAAKN,GACLgC,EAAM,EAAI1B,MAAKP,EAAQe,QACvBR,MAAKP,EAAQmB,MAAMc,EAAM,EAAG,KAAO1B,MAAKT,GACxCS,MAAKP,EAAQmB,MAAMc,EAAM,EAAGA,EAAM,EAAI,KACpCf,EACFA,MAAAX,MAAKP,EAAQmB,MAAMc,EAAM,EAAGA,EAAM,EAAI,KAAwBb,KAEvD,MAITb,MAAKP,EAAUO,MAAKP,EAAQmB,MAAMc,EAAM,GACjC,CAAEF,KAAMC,QAAON,UAGfA,GAAAnB,MAAKP,EAAQiC,GACtBA,GAtBA,MAHEP,GAASnB,MAAKR,EACdkC,GAAO,EA4BJ,OAAA,IACT"}
|