marc-ts 0.2.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"marctxt.cjs","names":[],"sources":["../src/marctxt.ts"],"sourcesContent":["/**\n * MARCBreaker (marctxt) parser and serializer.\n *\n * Also known as MARCMaker format. Each field is one line:\n *\n * =LDR 00706cam a2200217 a 4500\n * =001 5490\n * =245 14$aThe Hobbit /$cJ.R.R. Tolkien.\n * =650 \\1$aHobbits (Fictitious characters)$vFiction.\n *\n * Blank indicators are represented as `\\`. Records are separated by blank lines.\n * Subfield delimiter is `$` followed by a single character code.\n *\n * Curly-brace escape sequences (per LC MARCMaker spec):\n * `{` → `{lcub}` (left curly brace; reserved by the format)\n * `}` → `{rcub}` (right curly brace; reserved by the format)\n * `$` → `{dollar}` (subfield delimiter)\n * `\\` → `{bsol}` (backslash; reserved as blank-indicator stand-in)\n *\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── Indicator encoding ───────────────────────────────────────────────────────\n\nfunction encodeIndicator(ind: string): string {\n return ind === ' ' ? '\\\\' : ind;\n}\n\nfunction decodeIndicator(ch: string): string {\n return ch === '\\\\' ? ' ' : ch;\n}\n\n// ─── Value escape (see file header) ───────────────────────────────────────────\n\nfunction escapeValue(s: string): string {\n // Single-pass replacement avoids the problem of earlier escapes being\n // re-escaped by later passes (e.g. '{' → '{lcub}' then '}' → '{lcub{rcub}').\n return s.replace(/[{}\\\\\\n$]/g, (ch) => {\n if (ch === '{') return '{lcub}';\n if (ch === '}') return '{rcub}';\n if (ch === '$') return '{dollar}';\n if (ch === '\\\\') return '{bsol}';\n return ' ';\n });\n}\n\nfunction unescapeValue(s: string): string {\n return s.replace(/\\{(lcub|rcub|dollar|bsol)\\}/g, (_, name) => {\n if (name === 'lcub') return '{';\n if (name === 'rcub') return '}';\n if (name === 'dollar') return '$';\n return '\\\\';\n });\n}\n\n// ─── Subfield parsing ─────────────────────────────────────────────────────────\n\n/**\n * Parse a subfield string like \"$aValue$bOther\" into Subfield objects.\n * Uses split with a capturing group: \"$aFoo$bBar\" → [\"\", \"a\", \"Foo\", \"b\", \"Bar\"].\n * Any character following `$` is treated as a subfield code.\n */\nfunction parseSubfields(str: string): Subfield[] {\n const parts = str.split(/\\$(.)/);\n const subfields: Subfield[] = [];\n // parts[0] is content before the first $ — should be empty for well-formed data\n for (let i = 1; i < parts.length; i += 2) {\n subfields.push({ code: parts[i]!, value: unescapeValue(parts[i + 1] ?? '') });\n }\n return subfields;\n}\n\n// ─── Record block parser ──────────────────────────────────────────────────────\n\n/**\n * Parse a block of non-empty marctxt lines into a MarcRecord.\n * Each line has the form `=TAG content`.\n */\nfunction parseRecordLines(lines: string[]): MarcRecord {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n\n for (const line of lines) {\n if (!line.startsWith('=')) continue;\n const tag = line.slice(1, 4);\n // positions 4-5 are the two separator spaces; content starts at 6\n const content = line.slice(6);\n\n if (tag === 'LDR' || tag === '000') {\n leader = content;\n continue;\n }\n\n if (tag < '010') {\n // Control field: content is the raw field data\n fields.push({ tag, data: unescapeValue(content) });\n continue;\n }\n\n // Data field: first two chars are indicators, rest are subfields\n const indicator1 = decodeIndicator(content[0] ?? '\\\\');\n const indicator2 = decodeIndicator(content[1] ?? '\\\\');\n const subfields = parseSubfields(content.slice(2));\n fields.push({ tag, indicator1, indicator2, subfields });\n }\n\n return { leader, fields };\n}\n\n// ─── Public parse API ─────────────────────────────────────────────────────────\n\n/**\n * Parse a marctxt string containing one or more records separated by blank lines.\n * Returns all records found.\n */\nexport function parseMarcTxt(text: string): MarcRecord[] {\n const lines = text.replace(/\\r\\n/g, '\\n').split('\\n');\n const records: MarcRecord[] = [];\n let buffer: string[] = [];\n\n for (const line of lines) {\n if (line.trim() === '') {\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n buffer = [];\n }\n } else {\n buffer.push(line);\n }\n }\n\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n }\n\n return records;\n}\n\n// ─── Serializer ───────────────────────────────────────────────────────────────\n\nfunction serializeMarcTxtRecord(record: MarcRecord): string {\n const lines: string[] = [];\n\n lines.push(`=LDR ${record.leader}`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`=${field.tag} ${escapeValue(field.data)}`);\n } else {\n const ind1 = encodeIndicator(field.indicator1);\n const ind2 = encodeIndicator(field.indicator2);\n const subfields = field.subfields\n .map((sf) => `$${sf.code}${escapeValue(sf.value)}`)\n .join('');\n lines.push(`=${field.tag} ${ind1}${ind2}${subfields}`);\n }\n }\n\n return lines.join('\\n') + '\\n';\n}\n\n/**\n * Serialize one or more MarcRecords into a marctxt string.\n * Records are separated by blank lines.\n */\nexport function serializeMarcTxt(records: MarcRecord[]): string {\n // Each record ends with '\\n'; joining with '\\n' produces blank lines between records.\n return records.map(serializeMarcTxtRecord).join('\\n');\n}\n"],"mappings":"2GA0BA,SAAS,EAAgB,EAAqB,CAC5C,OAAO,IAAQ,IAAM,KAAO,CAC9B,CAEA,SAAS,EAAgB,EAAoB,CAC3C,OAAO,IAAO,KAAO,IAAM,CAC7B,CAIA,SAAS,EAAY,EAAmB,CAGtC,OAAO,EAAE,QAAQ,aAAe,GAC1B,IAAO,IAAY,SACnB,IAAO,IAAY,SACnB,IAAO,IAAY,WACnB,IAAO,KAAa,SACjB,GACR,CACH,CAEA,SAAS,EAAc,EAAmB,CACxC,OAAO,EAAE,QAAQ,+BAAA,CAAiC,EAAG,IAC/C,IAAS,OAAe,IACxB,IAAS,OAAe,IACxB,IAAS,SAAiB,IACvB,IACR,CACH,CASA,SAAS,EAAe,EAAyB,CAC/C,MAAM,EAAQ,EAAI,MAAM,OAAO,EACzB,EAAwB,CAAC,EAE/B,QAAS,EAAI,EAAG,EAAI,EAAM,OAAQ,GAAK,EACrC,EAAU,KAAK,CAAE,KAAM,EAAM,CAAA,EAAK,MAAO,EAAc,EAAM,EAAI,CAAA,GAAM,EAAE,CAAE,CAAC,EAE9E,OAAO,CACT,CAQA,SAAS,EAAiB,EAA6B,CACrD,IAAI,EAAS,GACb,MAAM,EAAuC,CAAC,EAE9C,UAAW,KAAQ,EAAO,CACxB,GAAI,CAAC,EAAK,WAAW,GAAG,EAAG,SAC3B,MAAM,EAAM,EAAK,MAAM,EAAG,CAAC,EAErB,EAAU,EAAK,MAAM,CAAC,EAE5B,GAAI,IAAQ,OAAS,IAAQ,MAAO,CAClC,EAAS,EACT,QACF,CAEA,GAAI,EAAM,MAAO,CAEf,EAAO,KAAK,CAAE,IAAA,EAAK,KAAM,EAAc,CAAO,CAAE,CAAC,EACjD,QACF,CAGA,MAAM,EAAa,EAAgB,EAAQ,CAAA,GAAM,IAAI,EAC/C,EAAa,EAAgB,EAAQ,CAAA,GAAM,IAAI,EAC/C,EAAY,EAAe,EAAQ,MAAM,CAAC,CAAC,EACjD,EAAO,KAAK,CAAE,IAAA,EAAK,WAAA,EAAY,WAAA,EAAY,UAAA,CAAU,CAAC,CACxD,CAEA,MAAO,CAAE,OAAA,EAAQ,OAAA,CAAO,CAC1B,CAQA,SAAgB,EAAa,EAA4B,CACvD,MAAM,EAAQ,EAAK,QAAQ,QAAS;AAAA,CAAI,EAAE,MAAM;AAAA,CAAI,EAC9C,EAAwB,CAAC,EAC/B,IAAI,EAAmB,CAAC,EAExB,UAAW,KAAQ,EACb,EAAK,KAAK,IAAM,GACd,EAAO,OAAS,IAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,EACrC,EAAS,CAAC,GAGZ,EAAO,KAAK,CAAI,EAIpB,OAAI,EAAO,OAAS,GAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,EAGhC,CACT,CAIA,SAAS,EAAuB,EAA4B,CAC1D,MAAM,EAAkB,CAAC,EAEzB,EAAM,KAAK,SAAS,EAAO,MAAA,EAAQ,EAEnC,UAAW,KAAS,EAAO,OACzB,GAAI,EAAA,eAAe,CAAK,EACtB,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,EAAY,EAAM,IAAI,CAAA,EAAG,MACjD,CACL,MAAM,EAAO,EAAgB,EAAM,UAAU,EACvC,EAAO,EAAgB,EAAM,UAAU,EACvC,EAAY,EAAM,UACrB,IAAK,GAAO,IAAI,EAAG,IAAA,GAAO,EAAY,EAAG,KAAK,CAAA,EAAG,EACjD,KAAK,EAAE,EACV,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,CAAA,GAAO,CAAA,GAAO,CAAA,EAAW,CACxD,CAGF,OAAO,EAAM,KAAK;AAAA,CAAI,EAAI;AAAA,CAC5B,CAMA,SAAgB,EAAiB,EAA+B,CAE9D,OAAO,EAAQ,IAAI,CAAsB,EAAE,KAAK;AAAA,CAAI,CACtD"}
1
+ {"version":3,"file":"marctxt.cjs","names":[],"sources":["../src/marctxt.ts"],"sourcesContent":["/**\n * MARCBreaker (marctxt) parser and serializer.\n *\n * Also known as MARCMaker format. Each field is one line:\n *\n * =LDR 00706cam a2200217 a 4500\n * =001 5490\n * =245 14$aThe Hobbit /$cJ.R.R. Tolkien.\n * =650 \\1$aHobbits (Fictitious characters)$vFiction.\n *\n * Blank indicators are represented as `\\`. Records are separated by blank lines.\n * Subfield delimiter is `$` followed by a single character code.\n *\n * Curly-brace escape sequences (per LC MARCMaker spec):\n * `{` → `{lcub}` (left curly brace; reserved by the format)\n * `}` → `{rcub}` (right curly brace; reserved by the format)\n * `$` → `{dollar}` (subfield delimiter)\n * `\\` → `{bsol}` (backslash; reserved as blank-indicator stand-in)\n *\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── Indicator encoding ───────────────────────────────────────────────────────\n\nfunction encodeIndicator(ind: string): string {\n return ind === ' ' ? '\\\\' : ind;\n}\n\nfunction decodeIndicator(ch: string): string {\n return ch === '\\\\' ? ' ' : ch;\n}\n\n// ─── Value escape (see file header) ───────────────────────────────────────────\n\nfunction escapeValue(s: string): string {\n // Single-pass replacement avoids the problem of earlier escapes being\n // re-escaped by later passes (e.g. '{' → '{lcub}' then '}' → '{lcub{rcub}').\n return s.replace(/[{}\\\\\\n$]/g, (ch) => {\n if (ch === '{') return '{lcub}';\n if (ch === '}') return '{rcub}';\n if (ch === '$') return '{dollar}';\n if (ch === '\\\\') return '{bsol}';\n return ' ';\n });\n}\n\nfunction unescapeValue(s: string): string {\n return s.replace(/\\{(lcub|rcub|dollar|bsol)\\}/g, (_, name) => {\n if (name === 'lcub') return '{';\n if (name === 'rcub') return '}';\n if (name === 'dollar') return '$';\n return '\\\\';\n });\n}\n\n// ─── Subfield parsing ─────────────────────────────────────────────────────────\n\n/**\n * Parse a subfield string like \"$aValue$bOther\" into Subfield objects.\n * Uses split with a capturing group: \"$aFoo$bBar\" → [\"\", \"a\", \"Foo\", \"b\", \"Bar\"].\n * Any character following `$` is treated as a subfield code.\n */\nfunction parseSubfields(str: string): Subfield[] {\n const parts = str.split(/\\$(.)/);\n const subfields: Subfield[] = [];\n // parts[0] is content before the first $ — should be empty for well-formed data\n for (let i = 1; i < parts.length; i += 2) {\n subfields.push({ code: parts[i]!, value: unescapeValue(parts[i + 1] ?? '') });\n }\n return subfields;\n}\n\n// ─── Record block parser ──────────────────────────────────────────────────────\n\n/**\n * Parse a block of non-empty marctxt lines into a MarcRecord.\n * Each line has the form `=TAG content`.\n */\nfunction parseRecordLines(lines: string[]): MarcRecord {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n\n for (const line of lines) {\n if (!line.startsWith('=')) continue;\n const tag = line.slice(1, 4);\n // positions 4-5 are the two separator spaces; content starts at 6\n const content = line.slice(6);\n\n if (tag === 'LDR' || tag === '000') {\n leader = content;\n continue;\n }\n\n if (tag < '010') {\n // Control field: content is the raw field data\n fields.push({ tag, data: unescapeValue(content) });\n continue;\n }\n\n // Data field: first two chars are indicators, rest are subfields\n const indicator1 = decodeIndicator(content[0] ?? '\\\\');\n const indicator2 = decodeIndicator(content[1] ?? '\\\\');\n const subfields = parseSubfields(content.slice(2));\n fields.push({ tag, indicator1, indicator2, subfields });\n }\n\n return { leader, fields };\n}\n\n// ─── Public parse API ─────────────────────────────────────────────────────────\n\n/**\n * Parse a marctxt string containing one or more records separated by blank lines.\n * Returns all records found.\n */\nexport function parseMarcTxt(text: string): MarcRecord[] {\n const lines = text.replace(/\\r\\n/g, '\\n').split('\\n');\n const records: MarcRecord[] = [];\n let buffer: string[] = [];\n\n for (const line of lines) {\n if (line.trim() === '') {\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n buffer = [];\n }\n } else {\n buffer.push(line);\n }\n }\n\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n }\n\n return records;\n}\n\n// ─── Serializer ───────────────────────────────────────────────────────────────\n\nfunction serializeMarcTxtRecord(record: MarcRecord): string {\n const lines: string[] = [];\n\n lines.push(`=LDR ${record.leader}`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`=${field.tag} ${escapeValue(field.data)}`);\n } else {\n const ind1 = encodeIndicator(field.indicator1);\n const ind2 = encodeIndicator(field.indicator2);\n const subfields = field.subfields.map((sf) => `$${sf.code}${escapeValue(sf.value)}`).join('');\n lines.push(`=${field.tag} ${ind1}${ind2}${subfields}`);\n }\n }\n\n return lines.join('\\n') + '\\n';\n}\n\n/**\n * Serialize one or more MarcRecords into a marctxt string.\n * Records are separated by blank lines.\n */\nexport function serializeMarcTxt(records: MarcRecord[]): string {\n // Each record ends with '\\n'; joining with '\\n' produces blank lines between records.\n return records.map(serializeMarcTxtRecord).join('\\n');\n}\n"],"mappings":"2GA0BA,SAAS,EAAgB,EAAqB,CAC5C,OAAO,IAAQ,IAAM,KAAO,CAC9B,CAEA,SAAS,EAAgB,EAAoB,CAC3C,OAAO,IAAO,KAAO,IAAM,CAC7B,CAIA,SAAS,EAAY,EAAmB,CAGtC,OAAO,EAAE,QAAQ,aAAe,GAC1B,IAAO,IAAY,SACnB,IAAO,IAAY,SACnB,IAAO,IAAY,WACnB,IAAO,KAAa,SACjB,GACR,CACH,CAEA,SAAS,EAAc,EAAmB,CACxC,OAAO,EAAE,QAAQ,+BAAA,CAAiC,EAAG,IAC/C,IAAS,OAAe,IACxB,IAAS,OAAe,IACxB,IAAS,SAAiB,IACvB,IACR,CACH,CASA,SAAS,EAAe,EAAyB,CAC/C,MAAM,EAAQ,EAAI,MAAM,OAAO,EACzB,EAAwB,CAAC,EAE/B,QAAS,EAAI,EAAG,EAAI,EAAM,OAAQ,GAAK,EACrC,EAAU,KAAK,CAAE,KAAM,EAAM,CAAA,EAAK,MAAO,EAAc,EAAM,EAAI,CAAA,GAAM,EAAE,CAAE,CAAC,EAE9E,OAAO,CACT,CAQA,SAAS,EAAiB,EAA6B,CACrD,IAAI,EAAS,GACb,MAAM,EAAuC,CAAC,EAE9C,UAAW,KAAQ,EAAO,CACxB,GAAI,CAAC,EAAK,WAAW,GAAG,EAAG,SAC3B,MAAM,EAAM,EAAK,MAAM,EAAG,CAAC,EAErB,EAAU,EAAK,MAAM,CAAC,EAE5B,GAAI,IAAQ,OAAS,IAAQ,MAAO,CAClC,EAAS,EACT,QACF,CAEA,GAAI,EAAM,MAAO,CAEf,EAAO,KAAK,CAAE,IAAA,EAAK,KAAM,EAAc,CAAO,CAAE,CAAC,EACjD,QACF,CAGA,MAAM,EAAa,EAAgB,EAAQ,CAAA,GAAM,IAAI,EAC/C,EAAa,EAAgB,EAAQ,CAAA,GAAM,IAAI,EAC/C,EAAY,EAAe,EAAQ,MAAM,CAAC,CAAC,EACjD,EAAO,KAAK,CAAE,IAAA,EAAK,WAAA,EAAY,WAAA,EAAY,UAAA,CAAU,CAAC,CACxD,CAEA,MAAO,CAAE,OAAA,EAAQ,OAAA,CAAO,CAC1B,CAQA,SAAgB,EAAa,EAA4B,CACvD,MAAM,EAAQ,EAAK,QAAQ,QAAS;AAAA,CAAI,EAAE,MAAM;AAAA,CAAI,EAC9C,EAAwB,CAAC,EAC/B,IAAI,EAAmB,CAAC,EAExB,UAAW,KAAQ,EACb,EAAK,KAAK,IAAM,GACd,EAAO,OAAS,IAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,EACrC,EAAS,CAAC,GAGZ,EAAO,KAAK,CAAI,EAIpB,OAAI,EAAO,OAAS,GAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,EAGhC,CACT,CAIA,SAAS,EAAuB,EAA4B,CAC1D,MAAM,EAAkB,CAAC,EAEzB,EAAM,KAAK,SAAS,EAAO,MAAA,EAAQ,EAEnC,UAAW,KAAS,EAAO,OACzB,GAAI,EAAA,eAAe,CAAK,EACtB,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,EAAY,EAAM,IAAI,CAAA,EAAG,MACjD,CACL,MAAM,EAAO,EAAgB,EAAM,UAAU,EACvC,EAAO,EAAgB,EAAM,UAAU,EACvC,EAAY,EAAM,UAAU,IAAK,GAAO,IAAI,EAAG,IAAA,GAAO,EAAY,EAAG,KAAK,CAAA,EAAG,EAAE,KAAK,EAAE,EAC5F,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,CAAA,GAAO,CAAA,GAAO,CAAA,EAAW,CACxD,CAGF,OAAO,EAAM,KAAK;AAAA,CAAI,EAAI;AAAA,CAC5B,CAMA,SAAgB,EAAiB,EAA+B,CAE9D,OAAO,EAAQ,IAAI,CAAsB,EAAE,KAAK;AAAA,CAAI,CACtD"}
package/dist/marctxt.js CHANGED
@@ -1,4 +1,4 @@
1
- import { t as b } from "./types-c4Mo9m9u.js";
1
+ import { t as b } from "./types-BMKDHD1l.js";
2
2
  function u(e) {
3
3
  return e === " " ? "\\" : e;
4
4
  }
@@ -1 +1 @@
1
- {"version":3,"file":"marctxt.js","names":[],"sources":["../src/marctxt.ts"],"sourcesContent":["/**\n * MARCBreaker (marctxt) parser and serializer.\n *\n * Also known as MARCMaker format. Each field is one line:\n *\n * =LDR 00706cam a2200217 a 4500\n * =001 5490\n * =245 14$aThe Hobbit /$cJ.R.R. Tolkien.\n * =650 \\1$aHobbits (Fictitious characters)$vFiction.\n *\n * Blank indicators are represented as `\\`. Records are separated by blank lines.\n * Subfield delimiter is `$` followed by a single character code.\n *\n * Curly-brace escape sequences (per LC MARCMaker spec):\n * `{` → `{lcub}` (left curly brace; reserved by the format)\n * `}` → `{rcub}` (right curly brace; reserved by the format)\n * `$` → `{dollar}` (subfield delimiter)\n * `\\` → `{bsol}` (backslash; reserved as blank-indicator stand-in)\n *\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── Indicator encoding ───────────────────────────────────────────────────────\n\nfunction encodeIndicator(ind: string): string {\n return ind === ' ' ? '\\\\' : ind;\n}\n\nfunction decodeIndicator(ch: string): string {\n return ch === '\\\\' ? ' ' : ch;\n}\n\n// ─── Value escape (see file header) ───────────────────────────────────────────\n\nfunction escapeValue(s: string): string {\n // Single-pass replacement avoids the problem of earlier escapes being\n // re-escaped by later passes (e.g. '{' → '{lcub}' then '}' → '{lcub{rcub}').\n return s.replace(/[{}\\\\\\n$]/g, (ch) => {\n if (ch === '{') return '{lcub}';\n if (ch === '}') return '{rcub}';\n if (ch === '$') return '{dollar}';\n if (ch === '\\\\') return '{bsol}';\n return ' ';\n });\n}\n\nfunction unescapeValue(s: string): string {\n return s.replace(/\\{(lcub|rcub|dollar|bsol)\\}/g, (_, name) => {\n if (name === 'lcub') return '{';\n if (name === 'rcub') return '}';\n if (name === 'dollar') return '$';\n return '\\\\';\n });\n}\n\n// ─── Subfield parsing ─────────────────────────────────────────────────────────\n\n/**\n * Parse a subfield string like \"$aValue$bOther\" into Subfield objects.\n * Uses split with a capturing group: \"$aFoo$bBar\" → [\"\", \"a\", \"Foo\", \"b\", \"Bar\"].\n * Any character following `$` is treated as a subfield code.\n */\nfunction parseSubfields(str: string): Subfield[] {\n const parts = str.split(/\\$(.)/);\n const subfields: Subfield[] = [];\n // parts[0] is content before the first $ — should be empty for well-formed data\n for (let i = 1; i < parts.length; i += 2) {\n subfields.push({ code: parts[i]!, value: unescapeValue(parts[i + 1] ?? '') });\n }\n return subfields;\n}\n\n// ─── Record block parser ──────────────────────────────────────────────────────\n\n/**\n * Parse a block of non-empty marctxt lines into a MarcRecord.\n * Each line has the form `=TAG content`.\n */\nfunction parseRecordLines(lines: string[]): MarcRecord {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n\n for (const line of lines) {\n if (!line.startsWith('=')) continue;\n const tag = line.slice(1, 4);\n // positions 4-5 are the two separator spaces; content starts at 6\n const content = line.slice(6);\n\n if (tag === 'LDR' || tag === '000') {\n leader = content;\n continue;\n }\n\n if (tag < '010') {\n // Control field: content is the raw field data\n fields.push({ tag, data: unescapeValue(content) });\n continue;\n }\n\n // Data field: first two chars are indicators, rest are subfields\n const indicator1 = decodeIndicator(content[0] ?? '\\\\');\n const indicator2 = decodeIndicator(content[1] ?? '\\\\');\n const subfields = parseSubfields(content.slice(2));\n fields.push({ tag, indicator1, indicator2, subfields });\n }\n\n return { leader, fields };\n}\n\n// ─── Public parse API ─────────────────────────────────────────────────────────\n\n/**\n * Parse a marctxt string containing one or more records separated by blank lines.\n * Returns all records found.\n */\nexport function parseMarcTxt(text: string): MarcRecord[] {\n const lines = text.replace(/\\r\\n/g, '\\n').split('\\n');\n const records: MarcRecord[] = [];\n let buffer: string[] = [];\n\n for (const line of lines) {\n if (line.trim() === '') {\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n buffer = [];\n }\n } else {\n buffer.push(line);\n }\n }\n\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n }\n\n return records;\n}\n\n// ─── Serializer ───────────────────────────────────────────────────────────────\n\nfunction serializeMarcTxtRecord(record: MarcRecord): string {\n const lines: string[] = [];\n\n lines.push(`=LDR ${record.leader}`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`=${field.tag} ${escapeValue(field.data)}`);\n } else {\n const ind1 = encodeIndicator(field.indicator1);\n const ind2 = encodeIndicator(field.indicator2);\n const subfields = field.subfields\n .map((sf) => `$${sf.code}${escapeValue(sf.value)}`)\n .join('');\n lines.push(`=${field.tag} ${ind1}${ind2}${subfields}`);\n }\n }\n\n return lines.join('\\n') + '\\n';\n}\n\n/**\n * Serialize one or more MarcRecords into a marctxt string.\n * Records are separated by blank lines.\n */\nexport function serializeMarcTxt(records: MarcRecord[]): string {\n // Each record ends with '\\n'; joining with '\\n' produces blank lines between records.\n return records.map(serializeMarcTxtRecord).join('\\n');\n}\n"],"mappings":";AA0BA,SAAS,EAAgB,GAAqB;AAC5C,SAAO,MAAQ,MAAM,OAAO;AAC9B;AAEA,SAAS,EAAgB,GAAoB;AAC3C,SAAO,MAAO,OAAO,MAAM;AAC7B;AAIA,SAAS,EAAY,GAAmB;AAGtC,SAAO,EAAE,QAAQ,cAAA,CAAe,MAC1B,MAAO,MAAY,WACnB,MAAO,MAAY,WACnB,MAAO,MAAY,aACnB,MAAO,OAAa,WACjB,GACR;AACH;AAEA,SAAS,EAAc,GAAmB;AACxC,SAAO,EAAE,QAAQ,gCAAA,CAAiC,GAAG,MAC/C,MAAS,SAAe,MACxB,MAAS,SAAe,MACxB,MAAS,WAAiB,MACvB,IACR;AACH;AASA,SAAS,EAAe,GAAyB;AAC/C,QAAM,IAAQ,EAAI,MAAM,OAAO,GACzB,IAAwB,CAAC;AAE/B,WAAS,IAAI,GAAG,IAAI,EAAM,QAAQ,KAAK,EACrC,CAAA,EAAU,KAAK;AAAA,IAAE,MAAM,EAAM,CAAA;AAAA,IAAK,OAAO,EAAc,EAAM,IAAI,CAAA,KAAM,EAAE;AAAA,EAAE,CAAC;AAE9E,SAAO;AACT;AAQA,SAAS,EAAiB,GAA6B;AACrD,MAAI,IAAS;AACb,QAAM,IAAuC,CAAC;AAE9C,aAAW,KAAQ,GAAO;AACxB,QAAI,CAAC,EAAK,WAAW,GAAG,EAAG;AAC3B,UAAM,IAAM,EAAK,MAAM,GAAG,CAAC,GAErB,IAAU,EAAK,MAAM,CAAC;AAE5B,QAAI,MAAQ,SAAS,MAAQ,OAAO;AAClC,MAAA,IAAS;AACT;AAAA,IACF;AAEA,QAAI,IAAM,OAAO;AAEf,MAAA,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,MAAM,EAAc,CAAO;AAAA,MAAE,CAAC;AACjD;AAAA,IACF;AAGA,UAAM,IAAa,EAAgB,EAAQ,CAAA,KAAM,IAAI,GAC/C,IAAa,EAAgB,EAAQ,CAAA,KAAM,IAAI,GAC/C,IAAY,EAAe,EAAQ,MAAM,CAAC,CAAC;AACjD,IAAA,EAAO,KAAK;AAAA,MAAE,KAAA;AAAA,MAAK,YAAA;AAAA,MAAY,YAAA;AAAA,MAAY,WAAA;AAAA,IAAU,CAAC;AAAA,EACxD;AAEA,SAAO;AAAA,IAAE,QAAA;AAAA,IAAQ,QAAA;AAAA,EAAO;AAC1B;AAQA,SAAgB,EAAa,GAA4B;AACvD,QAAM,IAAQ,EAAK,QAAQ,SAAS;AAAA,CAAI,EAAE,MAAM;AAAA,CAAI,GAC9C,IAAwB,CAAC;AAC/B,MAAI,IAAmB,CAAC;AAExB,aAAW,KAAQ,EACjB,CAAI,EAAK,KAAK,MAAM,KACd,EAAO,SAAS,MAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,GACrC,IAAS,CAAC,KAGZ,EAAO,KAAK,CAAI;AAIpB,SAAI,EAAO,SAAS,KAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,GAGhC;AACT;AAIA,SAAS,EAAuB,GAA4B;AAC1D,QAAM,IAAkB,CAAC;AAEzB,EAAA,EAAM,KAAK,SAAS,EAAO,MAAA,EAAQ;AAEnC,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,EAAY,EAAM,IAAI,CAAA,EAAG;AAAA,OACjD;AACL,UAAM,IAAO,EAAgB,EAAM,UAAU,GACvC,IAAO,EAAgB,EAAM,UAAU,GACvC,IAAY,EAAM,UACrB,IAAA,CAAK,MAAO,IAAI,EAAG,IAAA,GAAO,EAAY,EAAG,KAAK,CAAA,EAAG,EACjD,KAAK,EAAE;AACV,IAAA,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,CAAA,GAAO,CAAA,GAAO,CAAA,EAAW;AAAA,EACxD;AAGF,SAAO,EAAM,KAAK;AAAA,CAAI,IAAI;AAAA;AAC5B;AAMA,SAAgB,EAAiB,GAA+B;AAE9D,SAAO,EAAQ,IAAI,CAAsB,EAAE,KAAK;AAAA,CAAI;AACtD"}
1
+ {"version":3,"file":"marctxt.js","names":[],"sources":["../src/marctxt.ts"],"sourcesContent":["/**\n * MARCBreaker (marctxt) parser and serializer.\n *\n * Also known as MARCMaker format. Each field is one line:\n *\n * =LDR 00706cam a2200217 a 4500\n * =001 5490\n * =245 14$aThe Hobbit /$cJ.R.R. Tolkien.\n * =650 \\1$aHobbits (Fictitious characters)$vFiction.\n *\n * Blank indicators are represented as `\\`. Records are separated by blank lines.\n * Subfield delimiter is `$` followed by a single character code.\n *\n * Curly-brace escape sequences (per LC MARCMaker spec):\n * `{` → `{lcub}` (left curly brace; reserved by the format)\n * `}` → `{rcub}` (right curly brace; reserved by the format)\n * `$` → `{dollar}` (subfield delimiter)\n * `\\` → `{bsol}` (backslash; reserved as blank-indicator stand-in)\n *\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── Indicator encoding ───────────────────────────────────────────────────────\n\nfunction encodeIndicator(ind: string): string {\n return ind === ' ' ? '\\\\' : ind;\n}\n\nfunction decodeIndicator(ch: string): string {\n return ch === '\\\\' ? ' ' : ch;\n}\n\n// ─── Value escape (see file header) ───────────────────────────────────────────\n\nfunction escapeValue(s: string): string {\n // Single-pass replacement avoids the problem of earlier escapes being\n // re-escaped by later passes (e.g. '{' → '{lcub}' then '}' → '{lcub{rcub}').\n return s.replace(/[{}\\\\\\n$]/g, (ch) => {\n if (ch === '{') return '{lcub}';\n if (ch === '}') return '{rcub}';\n if (ch === '$') return '{dollar}';\n if (ch === '\\\\') return '{bsol}';\n return ' ';\n });\n}\n\nfunction unescapeValue(s: string): string {\n return s.replace(/\\{(lcub|rcub|dollar|bsol)\\}/g, (_, name) => {\n if (name === 'lcub') return '{';\n if (name === 'rcub') return '}';\n if (name === 'dollar') return '$';\n return '\\\\';\n });\n}\n\n// ─── Subfield parsing ─────────────────────────────────────────────────────────\n\n/**\n * Parse a subfield string like \"$aValue$bOther\" into Subfield objects.\n * Uses split with a capturing group: \"$aFoo$bBar\" → [\"\", \"a\", \"Foo\", \"b\", \"Bar\"].\n * Any character following `$` is treated as a subfield code.\n */\nfunction parseSubfields(str: string): Subfield[] {\n const parts = str.split(/\\$(.)/);\n const subfields: Subfield[] = [];\n // parts[0] is content before the first $ — should be empty for well-formed data\n for (let i = 1; i < parts.length; i += 2) {\n subfields.push({ code: parts[i]!, value: unescapeValue(parts[i + 1] ?? '') });\n }\n return subfields;\n}\n\n// ─── Record block parser ──────────────────────────────────────────────────────\n\n/**\n * Parse a block of non-empty marctxt lines into a MarcRecord.\n * Each line has the form `=TAG content`.\n */\nfunction parseRecordLines(lines: string[]): MarcRecord {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n\n for (const line of lines) {\n if (!line.startsWith('=')) continue;\n const tag = line.slice(1, 4);\n // positions 4-5 are the two separator spaces; content starts at 6\n const content = line.slice(6);\n\n if (tag === 'LDR' || tag === '000') {\n leader = content;\n continue;\n }\n\n if (tag < '010') {\n // Control field: content is the raw field data\n fields.push({ tag, data: unescapeValue(content) });\n continue;\n }\n\n // Data field: first two chars are indicators, rest are subfields\n const indicator1 = decodeIndicator(content[0] ?? '\\\\');\n const indicator2 = decodeIndicator(content[1] ?? '\\\\');\n const subfields = parseSubfields(content.slice(2));\n fields.push({ tag, indicator1, indicator2, subfields });\n }\n\n return { leader, fields };\n}\n\n// ─── Public parse API ─────────────────────────────────────────────────────────\n\n/**\n * Parse a marctxt string containing one or more records separated by blank lines.\n * Returns all records found.\n */\nexport function parseMarcTxt(text: string): MarcRecord[] {\n const lines = text.replace(/\\r\\n/g, '\\n').split('\\n');\n const records: MarcRecord[] = [];\n let buffer: string[] = [];\n\n for (const line of lines) {\n if (line.trim() === '') {\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n buffer = [];\n }\n } else {\n buffer.push(line);\n }\n }\n\n if (buffer.length > 0) {\n records.push(parseRecordLines(buffer));\n }\n\n return records;\n}\n\n// ─── Serializer ───────────────────────────────────────────────────────────────\n\nfunction serializeMarcTxtRecord(record: MarcRecord): string {\n const lines: string[] = [];\n\n lines.push(`=LDR ${record.leader}`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`=${field.tag} ${escapeValue(field.data)}`);\n } else {\n const ind1 = encodeIndicator(field.indicator1);\n const ind2 = encodeIndicator(field.indicator2);\n const subfields = field.subfields.map((sf) => `$${sf.code}${escapeValue(sf.value)}`).join('');\n lines.push(`=${field.tag} ${ind1}${ind2}${subfields}`);\n }\n }\n\n return lines.join('\\n') + '\\n';\n}\n\n/**\n * Serialize one or more MarcRecords into a marctxt string.\n * Records are separated by blank lines.\n */\nexport function serializeMarcTxt(records: MarcRecord[]): string {\n // Each record ends with '\\n'; joining with '\\n' produces blank lines between records.\n return records.map(serializeMarcTxtRecord).join('\\n');\n}\n"],"mappings":";AA0BA,SAAS,EAAgB,GAAqB;AAC5C,SAAO,MAAQ,MAAM,OAAO;AAC9B;AAEA,SAAS,EAAgB,GAAoB;AAC3C,SAAO,MAAO,OAAO,MAAM;AAC7B;AAIA,SAAS,EAAY,GAAmB;AAGtC,SAAO,EAAE,QAAQ,cAAA,CAAe,MAC1B,MAAO,MAAY,WACnB,MAAO,MAAY,WACnB,MAAO,MAAY,aACnB,MAAO,OAAa,WACjB,GACR;AACH;AAEA,SAAS,EAAc,GAAmB;AACxC,SAAO,EAAE,QAAQ,gCAAA,CAAiC,GAAG,MAC/C,MAAS,SAAe,MACxB,MAAS,SAAe,MACxB,MAAS,WAAiB,MACvB,IACR;AACH;AASA,SAAS,EAAe,GAAyB;AAC/C,QAAM,IAAQ,EAAI,MAAM,OAAO,GACzB,IAAwB,CAAC;AAE/B,WAAS,IAAI,GAAG,IAAI,EAAM,QAAQ,KAAK,EACrC,CAAA,EAAU,KAAK;AAAA,IAAE,MAAM,EAAM,CAAA;AAAA,IAAK,OAAO,EAAc,EAAM,IAAI,CAAA,KAAM,EAAE;AAAA,EAAE,CAAC;AAE9E,SAAO;AACT;AAQA,SAAS,EAAiB,GAA6B;AACrD,MAAI,IAAS;AACb,QAAM,IAAuC,CAAC;AAE9C,aAAW,KAAQ,GAAO;AACxB,QAAI,CAAC,EAAK,WAAW,GAAG,EAAG;AAC3B,UAAM,IAAM,EAAK,MAAM,GAAG,CAAC,GAErB,IAAU,EAAK,MAAM,CAAC;AAE5B,QAAI,MAAQ,SAAS,MAAQ,OAAO;AAClC,MAAA,IAAS;AACT;AAAA,IACF;AAEA,QAAI,IAAM,OAAO;AAEf,MAAA,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,MAAM,EAAc,CAAO;AAAA,MAAE,CAAC;AACjD;AAAA,IACF;AAGA,UAAM,IAAa,EAAgB,EAAQ,CAAA,KAAM,IAAI,GAC/C,IAAa,EAAgB,EAAQ,CAAA,KAAM,IAAI,GAC/C,IAAY,EAAe,EAAQ,MAAM,CAAC,CAAC;AACjD,IAAA,EAAO,KAAK;AAAA,MAAE,KAAA;AAAA,MAAK,YAAA;AAAA,MAAY,YAAA;AAAA,MAAY,WAAA;AAAA,IAAU,CAAC;AAAA,EACxD;AAEA,SAAO;AAAA,IAAE,QAAA;AAAA,IAAQ,QAAA;AAAA,EAAO;AAC1B;AAQA,SAAgB,EAAa,GAA4B;AACvD,QAAM,IAAQ,EAAK,QAAQ,SAAS;AAAA,CAAI,EAAE,MAAM;AAAA,CAAI,GAC9C,IAAwB,CAAC;AAC/B,MAAI,IAAmB,CAAC;AAExB,aAAW,KAAQ,EACjB,CAAI,EAAK,KAAK,MAAM,KACd,EAAO,SAAS,MAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,GACrC,IAAS,CAAC,KAGZ,EAAO,KAAK,CAAI;AAIpB,SAAI,EAAO,SAAS,KAClB,EAAQ,KAAK,EAAiB,CAAM,CAAC,GAGhC;AACT;AAIA,SAAS,EAAuB,GAA4B;AAC1D,QAAM,IAAkB,CAAC;AAEzB,EAAA,EAAM,KAAK,SAAS,EAAO,MAAA,EAAQ;AAEnC,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,EAAY,EAAM,IAAI,CAAA,EAAG;AAAA,OACjD;AACL,UAAM,IAAO,EAAgB,EAAM,UAAU,GACvC,IAAO,EAAgB,EAAM,UAAU,GACvC,IAAY,EAAM,UAAU,IAAA,CAAK,MAAO,IAAI,EAAG,IAAA,GAAO,EAAY,EAAG,KAAK,CAAA,EAAG,EAAE,KAAK,EAAE;AAC5F,IAAA,EAAM,KAAK,IAAI,EAAM,GAAA,KAAQ,CAAA,GAAO,CAAA,GAAO,CAAA,EAAW;AAAA,EACxD;AAGF,SAAO,EAAM,KAAK;AAAA,CAAI,IAAI;AAAA;AAC5B;AAMA,SAAgB,EAAiB,GAA+B;AAE9D,SAAO,EAAQ,IAAI,CAAsB,EAAE,KAAK;AAAA,CAAI;AACtD"}
package/dist/marcxml.cjs CHANGED
@@ -1,8 +1,8 @@
1
- Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const $=require("./types-CJcxHJff.cjs");var b=new Map([["amp","&"],["lt","<"],["gt",">"],["quot",'"'],["apos","'"]]);function g(t){return t.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g,(i,r,s,e)=>{if(r!==void 0){const n=parseInt(r,16);return n>=0&&n<=1114111?String.fromCodePoint(n):"�"}if(s!==void 0){const n=parseInt(s,10);return n>=0&&n<=1114111?String.fromCodePoint(n):"�"}return b.get(e)??i})}function u(t){return t.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g,"�").replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/\r/g,"&#13;")}function f(t){const i=t.indexOf(":");return i===-1?t:t.slice(i+1)}function m(t){const i={},r=/([a-zA-Z_:][^\s=]*)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;let s;for(;(s=r.exec(t))!==null;){const e=f(s[1]);i[e]=g(s[2]??s[3]??"")}return i}function v(t){const i=[];let r=0;for(;r<t.length;){const s=t.indexOf("<",r);if(s===-1){t.slice(r).trim()&&i.push({type:"text",text:g(t.slice(r))});break}if(s>r){const c=t.slice(r,s);c.trim()&&i.push({type:"text",text:g(c)})}const e=t.indexOf(">",s);if(e===-1)break;const n=t.slice(s+1,e);if(n.startsWith("!")||n.startsWith("?")){r=e+1;continue}if(n.startsWith("/"))i.push({type:"close",name:f(n.slice(1).trim())});else if(n.endsWith("/")){const c=n.slice(0,-1).trim(),o=c.search(/\s/),l=o===-1?c:c.slice(0,o),p=o===-1?"":c.slice(o);i.push({type:"self-close",name:f(l),attrs:m(p)})}else{const c=n.search(/\s/),o=c===-1?n:n.slice(0,c),l=c===-1?"":n.slice(c);i.push({type:"open",name:f(o),attrs:m(l)})}r=e+1}return i}function M(t,i){let r="";const s=[];let e=i;for(;e<t.length;){const n=t[e];if(n.type==="close"&&n.name==="record")return{record:{leader:r,fields:s},end:e+1};if(n.type==="open"&&n.name==="leader"){e++,e<t.length&&t[e].type==="text"&&(r=t[e].text.trim(),e++),e<t.length&&t[e].type==="close"&&e++;continue}if(n.type==="self-close"&&n.name==="controlfield"){s.push({tag:n.attrs?.tag??"",data:""}),e++;continue}if(n.type==="open"&&n.name==="controlfield"){const c=n.attrs?.tag??"";e++;let o="";e<t.length&&t[e].type==="text"&&(o=t[e].text??"",e++),e<t.length&&t[e].type==="close"&&e++,s.push({tag:c,data:o});continue}if(n.type==="self-close"&&n.name==="datafield"){s.push({tag:n.attrs?.tag??"",indicator1:n.attrs?.ind1??" ",indicator2:n.attrs?.ind2??" ",subfields:[]}),e++;continue}if(n.type==="open"&&n.name==="datafield"){const c=n.attrs?.tag??"",o=n.attrs?.ind1??" ",l=n.attrs?.ind2??" ",p=[];for(e++;e<t.length;){const d=t[e];if(d.type==="close"&&d.name==="datafield"){e++;break}if(d.type==="open"&&d.name==="subfield"){const x=d.attrs?.code??"";e++;let h="";e<t.length&&t[e].type==="text"&&(h=t[e].text??"",e++),e<t.length&&t[e].type==="close"&&e++,p.push({code:x,value:h});continue}e++}s.push({tag:c,indicator1:o,indicator2:l,subfields:p});continue}e++}return{record:{leader:r,fields:s},end:e}}function X(t){const i=v(t),r=[];let s=0;for(;s<i.length;){const e=i[s];if(e.type==="open"&&e.name==="record"){const{record:n,end:c}=M(i,s+1);r.push(n),s=c;continue}s++}return r}var w=`<?xml version="1.0" encoding="UTF-8"?>
2
- `,y='xmlns="http://www.loc.gov/MARC21/slim"',a=" ";function A(t){const i=[`<record ${y}>`];i.push(`${a}<leader>${u(t.leader)}</leader>`);for(const r of t.fields)if($.isControlField(r))i.push(`${a}<controlfield tag="${r.tag}">${u(r.data)}</controlfield>`);else{const s=r.indicator1===" "?" ":r.indicator1,e=r.indicator2===" "?" ":r.indicator2;i.push(`${a}<datafield tag="${r.tag}" ind1="${s}" ind2="${e}">`);for(const n of r.subfields)i.push(`${a}${a}<subfield code="${n.code}">${u(n.value)}</subfield>`);i.push(`${a}</datafield>`)}return i.push("</record>"),i.join(`
3
- `)}function C(t){const i=[w,`<collection ${y}>`];for(const r of t){const s=A(r).split(`
4
- `).map(e=>a+e).join(`
5
- `);i.push(s)}return i.push("</collection>"),i.join(`
6
- `)}exports.parseMarcXml=X;exports.serializeMarcXml=C;
1
+ Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const M=require("./types-CsOhH4OF.cjs"),d=require("./warnings-6yoB06xI.cjs");var A=new Map([["amp","&"],["lt","<"],["gt",">"],["quot",'"'],["apos","'"]]);function y(e){return e.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g,(s,n,i,r)=>{if(n!==void 0){const c=parseInt(n,16);return c>=0&&c<=1114111?String.fromCodePoint(c):"�"}if(i!==void 0){const c=parseInt(i,10);return c>=0&&c<=1114111?String.fromCodePoint(c):"�"}return A.get(r)??s})}function u(e){return e.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g,"�").replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;").replace(/\r/g,"&#13;")}function m(e){const s=e.indexOf(":");return s===-1?e:e.slice(s+1)}function _(e){const s={},n=/([a-zA-Z_:][^\s=]*)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;let i;for(;(i=n.exec(e))!==null;){const r=m(i[1]);s[r]=y(i[2]??i[3]??"")}return s}function T(e,s){let n=!1,i=!1;for(let r=s;r<e.length;r++){const c=e[r];if(c==='"'&&!n)i=!i;else if(c==="'"&&!i)n=!n;else if(c===">"&&!n&&!i)return r}return-1}function X(e,s){const n=[];let i=0;for(;i<e.length;){const r=e.indexOf("<",i);if(r===-1){e.slice(i).trim()&&n.push({type:"text",text:y(e.slice(i))});break}if(r>i){const t=e.slice(i,r);t.trim()&&n.push({type:"text",text:y(t)})}if(e.startsWith("<!--",r)){const t=e.indexOf("-->",r+4);t===-1?(s?.push(d.createWarning("malformed_xml","Unterminated comment")),i=e.length):i=t+3;continue}if(e.startsWith("<![CDATA[",r)){const t=e.indexOf("]]>",r+9);if(t===-1)s?.push(d.createWarning("malformed_xml","Unterminated CDATA section")),i=e.length;else{const a=e.slice(r+9,t);a&&n.push({type:"text",text:a}),i=t+3}continue}if(e.startsWith("<?",r)){const t=e.indexOf("?>",r+2);i=t===-1?e.length:t+2;continue}if(e.startsWith("<!",r)){const t=e.indexOf(">",r+2);i=t===-1?e.length:t+1;continue}const c=T(e,r+1);if(c===-1){s?.push(d.createWarning("malformed_xml","Unclosed tag at end of input"));break}const o=e.slice(r+1,c);if(o.startsWith("/"))n.push({type:"close",name:m(o.slice(1).trim())});else if(o.endsWith("/")){const t=o.slice(0,-1).trim(),a=t.search(/\s/),l=a===-1?t:t.slice(0,a),f=a===-1?"":t.slice(a);n.push({type:"self-close",name:m(l),attrs:_(f)})}else{const t=o.search(/\s/),a=t===-1?o:o.slice(0,t),l=t===-1?"":o.slice(t);n.push({type:"open",name:m(a),attrs:_(l)})}i=c+1}return n}function g(e,s,n){if(n.strict)throw new Error(s.message);const i=n.maxWarnings??100;e.length<i&&e.push(s)}function C(e,s,n,i){let r="",c=!1;const o=[];let t=s;for(;t<e.length;){const a=e[t];if(a.type==="close"&&a.name==="record")return c||g(n,d.createWarning("missing_element","Record has no <leader> element"),i),{record:{leader:r,fields:o},end:t+1};if(a.type==="open"&&a.name==="leader"){c=!0,t++,t<e.length&&e[t].type==="text"&&(r=e[t].text.trim(),t++),r.length!==24&&g(n,d.createWarning("invalid_leader",`Leader is ${r.length} characters, expected 24`),i),t<e.length&&e[t].type==="close"&&t++;continue}if(a.type==="self-close"&&a.name==="controlfield"){const l=a.attrs?.tag;l===void 0&&g(n,d.createWarning("missing_element","controlfield missing tag attribute"),i),o.push({tag:l??"",data:""}),t++;continue}if(a.type==="open"&&a.name==="controlfield"){const l=a.attrs?.tag;l===void 0&&g(n,d.createWarning("missing_element","controlfield missing tag attribute"),i),t++;let f="";t<e.length&&e[t].type==="text"&&(f=e[t].text??"",t++),t<e.length&&e[t].type==="close"&&t++,o.push({tag:l??"",data:f});continue}if(a.type==="self-close"&&a.name==="datafield"){const l=a.attrs?.tag;l===void 0&&g(n,d.createWarning("missing_element","datafield missing tag attribute"),i),o.push({tag:l??"",indicator1:a.attrs?.ind1??" ",indicator2:a.attrs?.ind2??" ",subfields:[]}),t++;continue}if(a.type==="open"&&a.name==="datafield"){const l=a.attrs?.tag;l===void 0&&g(n,d.createWarning("missing_element","datafield missing tag attribute"),i);const f=a.attrs?.ind1??" ",E=a.attrs?.ind2??" ",W=[];for(t++;t<e.length;){const h=e[t];if(h.type==="close"&&h.name==="datafield"){t++;break}if(h.type==="open"&&h.name==="subfield"){const b=h.attrs?.code;b===void 0&&g(n,d.createWarning("missing_element","subfield missing code attribute",void 0,l),i),t++;let v="";t<e.length&&e[t].type==="text"&&(v=e[t].text??"",t++),t<e.length&&e[t].type==="close"&&t++,W.push({code:b??"",value:v});continue}t++}o.push({tag:l??"",indicator1:f,indicator2:E,subfields:W});continue}t++}return c||g(n,d.createWarning("missing_element","Record has no <leader> element"),i),{record:{leader:r,fields:o},end:t}}function $(e,s){const n=s??{},i=[],r=X(e,i),c=[];let o=0;for(;o<r.length;){const t=r[o];if(t.type==="open"&&t.name==="record"){const a=[],{record:l,end:f}=C(r,o+1,a,n);c.push({record:l,warnings:a}),o=f;continue}o++}return c.length===0&&i.length>0?c.push({record:null,warnings:i}):i.length>0&&c.length>0&&(c[0]={record:c[0].record,warnings:[...i,...c[0].warnings]}),{results:c}}function O(e,s){return $(e,s).results.map(n=>n.record).filter(n=>n!==null)}var S=`<?xml version="1.0" encoding="UTF-8"?>
2
+ `,x='xmlns="http://www.loc.gov/MARC21/slim"',p=" ";function I(e){const s=[`<record ${x}>`];s.push(`${p}<leader>${u(e.leader)}</leader>`);for(const n of e.fields)if(M.isControlField(n))s.push(`${p}<controlfield tag="${u(n.tag)}">${u(n.data)}</controlfield>`);else{const i=n.indicator1===" "?" ":n.indicator1,r=n.indicator2===" "?" ":n.indicator2;s.push(`${p}<datafield tag="${u(n.tag)}" ind1="${u(i)}" ind2="${u(r)}">`);for(const c of n.subfields)s.push(`${p}${p}<subfield code="${u(c.code)}">${u(c.value)}</subfield>`);s.push(`${p}</datafield>`)}return s.push("</record>"),s.join(`
3
+ `)}function q(e){const s=[S,`<collection ${x}>`];for(const n of e){const i=I(n).split(`
4
+ `).map(r=>p+r).join(`
5
+ `);s.push(i)}return s.push("</collection>"),s.join(`
6
+ `)}exports.parseMarcXml=O;exports.parseMarcXmlWithWarnings=$;exports.serializeMarcXml=q;
7
7
 
8
8
  //# sourceMappingURL=marcxml.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"marcxml.cjs","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&amp;')\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, '&#13;');\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Sufficient for the well-constrained MARCXML format.\n */\nfunction tokenise(xml: string): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n const gtPos = xml.indexOf('>', ltPos);\n if (gtPos === -1) break;\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n // Skip comments, PIs, DOCTYPE\n if (tag.startsWith('!') || tag.startsWith('?')) {\n i = gtPos + 1;\n continue;\n }\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({ type: 'self-close', name: localName(name), attrs: parseAttrs(attrStr) });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({ type: 'open', name: localName(name), attrs: parseAttrs(attrStr) });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n * Mutates `pos` via the returned index.\n */\nfunction parseRecordTokens(tokens: Token[], start: number): { record: MarcRecord; end: number } {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n fields.push({ tag: tok.attrs?.['tag'] ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'] ?? '';\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag, data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n fields.push({\n tag: tok.attrs?.['tag'] ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'] ?? '';\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'] ?? '';\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code, value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag, indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all records found.\n */\nexport function parseMarcXml(xml: string): MarcRecord[] {\n const tokens = tokenise(xml);\n const records: MarcRecord[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const { record, end } = parseRecordTokens(tokens, i + 1);\n records.push(record);\n i = end;\n continue;\n }\n i++;\n }\n\n return records;\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\nfunction serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`${INDENT}<controlfield tag=\"${field.tag}\">${escapeXml(field.data)}</controlfield>`);\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(`${INDENT}<datafield tag=\"${field.tag}\" ind1=\"${ind1}\" ind2=\"${ind2}\">`);\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${sf.code}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [\n XML_HEADER,\n `<collection ${COLLECTION_NS}>`,\n ];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":"2GAgBA,IAAM,EAA0C,IAAI,IAAI,CACtD,CAAC,MAAO,GAAG,EACX,CAAC,KAAM,GAAG,EACV,CAAC,KAAM,GAAG,EACV,CAAC,OAAQ,GAAG,EACZ,CAAC,OAAQ,GAAG,CACd,CAAC,EAED,SAAS,EAAY,EAAsB,CACzC,OAAO,EAAK,QAAQ,gDAAA,CAAkD,EAAG,EAAK,EAAK,IAAS,CAC1F,GAAI,IAAQ,OAAW,CACrB,MAAM,EAAK,SAAS,EAAK,EAAE,EAC3B,OAAO,GAAM,GAAK,GAAM,QAAW,OAAO,cAAc,CAAE,EAAI,GAChE,CACA,GAAI,IAAQ,OAAW,CACrB,MAAM,EAAK,SAAS,EAAK,EAAE,EAC3B,OAAO,GAAM,GAAK,GAAM,QAAW,OAAO,cAAc,CAAE,EAAI,GAChE,CACA,OAAO,EAAW,IAAI,CAAI,GAAK,CACjC,CAAC,CACH,CAEA,SAAS,EAAU,EAAsB,CACvC,OAAO,EAIJ,QAAQ,gCAAiC,GAAG,EAC5C,QAAQ,KAAM,OAAO,EACrB,QAAQ,KAAM,MAAM,EACpB,QAAQ,KAAM,MAAM,EACpB,QAAQ,KAAM,QAAQ,EAGtB,QAAQ,MAAO,OAAO,CAC3B,CAeA,SAAS,EAAU,EAAqB,CACtC,MAAM,EAAQ,EAAI,QAAQ,GAAG,EAC7B,OAAO,IAAU,GAAK,EAAM,EAAI,MAAM,EAAQ,CAAC,CACjD,CAKA,SAAS,EAAW,EAAyC,CAC3D,MAAM,EAAgC,CAAC,EACjC,EAAK,qDACX,IAAI,EACJ,MAAQ,EAAI,EAAG,KAAK,CAAO,KAAO,MAAM,CACtC,MAAM,EAAM,EAAU,EAAE,CAAA,CAAG,EAC3B,EAAM,CAAA,EAAO,EAAY,EAAE,CAAA,GAAM,EAAE,CAAA,GAAM,EAAE,CAC7C,CACA,OAAO,CACT,CAOA,SAAS,EAAS,EAAsB,CACtC,MAAM,EAAkB,CAAC,EACzB,IAAI,EAAI,EAER,KAAO,EAAI,EAAI,QAAQ,CACrB,MAAM,EAAQ,EAAI,QAAQ,IAAK,CAAC,EAGhC,GAAI,IAAU,GAAI,CACH,EAAI,MAAM,CAAC,EAAE,KACtB,GAAM,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAY,EAAI,MAAM,CAAC,CAAC,CAAE,CAAC,EACvE,KACF,CAEA,GAAI,EAAQ,EAAG,CACb,MAAM,EAAM,EAAI,MAAM,EAAG,CAAK,EACjB,EAAI,KACb,GAAM,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAY,CAAG,CAAE,CAAC,CAChE,CAEA,MAAM,EAAQ,EAAI,QAAQ,IAAK,CAAK,EACpC,GAAI,IAAU,GAAI,MAElB,MAAM,EAAM,EAAI,MAAM,EAAQ,EAAG,CAAK,EAGtC,GAAI,EAAI,WAAW,GAAG,GAAK,EAAI,WAAW,GAAG,EAAG,CAC9C,EAAI,EAAQ,EACZ,QACF,CAEA,GAAI,EAAI,WAAW,GAAG,EACpB,EAAO,KAAK,CAAE,KAAM,QAAS,KAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC,CAAE,CAAC,UAC1D,EAAI,SAAS,GAAG,EAAG,CAC5B,MAAM,EAAQ,EAAI,MAAM,EAAG,EAAE,EAAE,KAAK,EAC9B,EAAW,EAAM,OAAO,IAAI,EAC5B,EAAO,IAAa,GAAK,EAAQ,EAAM,MAAM,EAAG,CAAQ,EACxD,EAAU,IAAa,GAAK,GAAK,EAAM,MAAM,CAAQ,EAC3D,EAAO,KAAK,CAAE,KAAM,aAAc,KAAM,EAAU,CAAI,EAAG,MAAO,EAAW,CAAO,CAAE,CAAC,CACvF,KAAO,CACL,MAAM,EAAW,EAAI,OAAO,IAAI,EAC1B,EAAO,IAAa,GAAK,EAAM,EAAI,MAAM,EAAG,CAAQ,EACpD,EAAU,IAAa,GAAK,GAAK,EAAI,MAAM,CAAQ,EACzD,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAU,CAAI,EAAG,MAAO,EAAW,CAAO,CAAE,CAAC,CACjF,CAEA,EAAI,EAAQ,CACd,CAEA,OAAO,CACT,CAQA,SAAS,EAAkB,EAAiB,EAAoD,CAC9F,IAAI,EAAS,GACb,MAAM,EAAuC,CAAC,EAC9C,IAAI,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAM,EAAO,CAAA,EAEnB,GAAI,EAAI,OAAS,SAAW,EAAI,OAAS,SACvC,MAAO,CAAE,OAAQ,CAAE,OAAA,EAAQ,OAAA,CAAO,EAAG,IAAK,EAAI,CAAE,EAGlD,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,SAAU,CAChD,IACI,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAS,EAAO,CAAA,EAAI,KAAM,KAAK,EAC/B,KAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,QACF,CAEA,GAAI,EAAI,OAAS,cAAgB,EAAI,OAAS,eAAgB,CAC5D,EAAO,KAAK,CAAE,IAAK,EAAI,OAAQ,KAAU,GAAI,KAAM,EAAG,CAAC,EACvD,IACA,QACF,CAEA,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,eAAgB,CACtD,MAAM,EAAM,EAAI,OAAQ,KAAU,GAClC,IACA,IAAI,EAAO,GACP,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAO,EAAO,CAAA,EAAI,MAAQ,GAC1B,KAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,EAAO,KAAK,CAAE,IAAA,EAAK,KAAA,CAAK,CAAC,EACzB,QACF,CAEA,GAAI,EAAI,OAAS,cAAgB,EAAI,OAAS,YAAa,CACzD,EAAO,KAAK,CACV,IAAK,EAAI,OAAQ,KAAU,GAC3B,WAAY,EAAI,OAAQ,MAAW,IACnC,WAAY,EAAI,OAAQ,MAAW,IACnC,UAAW,CAAC,CACd,CAAC,EACD,IACA,QACF,CAEA,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,YAAa,CACnD,MAAM,EAAM,EAAI,OAAQ,KAAU,GAC5B,EAAa,EAAI,OAAQ,MAAW,IACpC,EAAa,EAAI,OAAQ,MAAW,IACpC,EAAwB,CAAC,EAG/B,IAFA,IAEO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAO,EAAO,CAAA,EACpB,GAAI,EAAK,OAAS,SAAW,EAAK,OAAS,YAAa,CACtD,IACA,KACF,CACA,GAAI,EAAK,OAAS,QAAU,EAAK,OAAS,WAAY,CACpD,MAAM,EAAO,EAAK,OAAQ,MAAW,GACrC,IACA,IAAI,EAAQ,GACR,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAQ,EAAO,CAAA,EAAI,MAAQ,GAC3B,KAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,EAAU,KAAK,CAAE,KAAA,EAAM,MAAA,CAAM,CAAC,EAC9B,QACF,CACA,GACF,CAEA,EAAO,KAAK,CAAE,IAAA,EAAK,WAAA,EAAY,WAAA,EAAY,UAAA,CAAU,CAAC,EACtD,QACF,CAEA,GACF,CAEA,MAAO,CAAE,OAAQ,CAAE,OAAA,EAAQ,OAAA,CAAO,EAAG,IAAK,CAAE,CAC9C,CAMA,SAAgB,EAAa,EAA2B,CACtD,MAAM,EAAS,EAAS,CAAG,EACrB,EAAwB,CAAC,EAC/B,IAAI,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAM,EAAO,CAAA,EACnB,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,SAAU,CAChD,KAAM,CAAE,OAAA,EAAQ,IAAA,CAAA,EAAQ,EAAkB,EAAQ,EAAI,CAAC,EACvD,EAAQ,KAAK,CAAM,EACnB,EAAI,EACJ,QACF,CACA,GACF,CAEA,OAAO,CACT,CAIA,IAAM,EAAa;AAAA,EACb,EAAgB,yCAChB,EAAS,KAEf,SAAS,EAAuB,EAA4B,CAC1D,MAAM,EAAkB,CAAC,WAAW,CAAA,GAAgB,EACpD,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY,EAElE,UAAW,KAAS,EAAO,OACzB,GAAI,EAAA,eAAe,CAAK,EACtB,EAAM,KAAK,GAAG,CAAA,sBAA4B,EAAM,GAAA,KAAQ,EAAU,EAAM,IAAI,CAAA,iBAAkB,MACzF,CACL,MAAM,EAAO,EAAM,aAAe,IAAM,IAAM,EAAM,WAC9C,EAAO,EAAM,aAAe,IAAM,IAAM,EAAM,WACpD,EAAM,KAAK,GAAG,CAAA,mBAAyB,EAAM,GAAA,WAAc,CAAA,WAAe,CAAA,IAAQ,EAClF,UAAW,KAAM,EAAM,UACrB,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAG,IAAA,KAAS,EAAU,EAAG,KAAK,CAAA,aACrE,EAEF,EAAM,KAAK,GAAG,CAAA,cAAoB,CACpC,CAGF,OAAA,EAAM,KAAK,WAAW,EACf,EAAM,KAAK;AAAA,CAAI,CACxB,CAKA,SAAgB,EAAiB,EAA+B,CAC9D,MAAM,EAAkB,CACtB,EACA,eAAe,CAAA,GACjB,EAEA,UAAW,KAAU,EAAS,CAE5B,MAAM,EAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAK,GAAS,EAAS,CAAI,EAC3B,KAAK;AAAA,CAAI,EACZ,EAAM,KAAK,CAAS,CACtB,CAEA,OAAA,EAAM,KAAK,eAAe,EACnB,EAAM,KAAK;AAAA,CAAI,CACxB"}
1
+ {"version":3,"file":"marcxml.cjs","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type {\n MarcRecord,\n ControlField,\n DataField,\n Subfield,\n ParseOptions,\n ParseResult,\n ParseBatchResult,\n MarcWarning,\n} from './types';\nimport { isControlField } from './types';\nimport { createWarning } from './warnings';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return (\n text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&amp;')\n .replace(/</g, '&lt;')\n .replace(/>/g, '&gt;')\n .replace(/\"/g, '&quot;')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, '&#13;')\n );\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Find the closing `>` of an XML element tag, respecting quoted attribute values.\n */\nfunction findTagEnd(xml: string, start: number): number {\n let inSingle = false;\n let inDouble = false;\n for (let j = start; j < xml.length; j++) {\n const ch = xml[j];\n if (ch === '\"' && !inSingle) inDouble = !inDouble;\n else if (ch === \"'\" && !inDouble) inSingle = !inSingle;\n else if (ch === '>' && !inSingle && !inDouble) return j;\n }\n return -1;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Handles CDATA sections as literal text.\n */\nfunction tokenise(xml: string, warnings?: MarcWarning[]): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n // Handle special constructs before finding the regular tag end\n if (xml.startsWith('<!--', ltPos)) {\n const commentEnd = xml.indexOf('-->', ltPos + 4);\n if (commentEnd === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unterminated comment'));\n i = xml.length;\n } else {\n i = commentEnd + 3;\n }\n continue;\n }\n\n if (xml.startsWith('<![CDATA[', ltPos)) {\n const cdataEnd = xml.indexOf(']]>', ltPos + 9);\n if (cdataEnd === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unterminated CDATA section'));\n i = xml.length;\n } else {\n // CDATA content is literal — no entity unescaping\n const cdataText = xml.slice(ltPos + 9, cdataEnd);\n if (cdataText) tokens.push({ type: 'text', text: cdataText });\n i = cdataEnd + 3;\n }\n continue;\n }\n\n if (xml.startsWith('<?', ltPos)) {\n const piEnd = xml.indexOf('?>', ltPos + 2);\n i = piEnd === -1 ? xml.length : piEnd + 2;\n continue;\n }\n\n if (xml.startsWith('<!', ltPos)) {\n const bangEnd = xml.indexOf('>', ltPos + 2);\n i = bangEnd === -1 ? xml.length : bangEnd + 1;\n continue;\n }\n\n // Regular element tag — use quote-aware scanning\n const gtPos = findTagEnd(xml, ltPos + 1);\n if (gtPos === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unclosed tag at end of input'));\n break;\n }\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({\n type: 'self-close',\n name: localName(name),\n attrs: parseAttrs(attrStr),\n });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({\n type: 'open',\n name: localName(name),\n attrs: parseAttrs(attrStr),\n });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\nfunction emitWarning(warnings: MarcWarning[], warning: MarcWarning, options: ParseOptions): void {\n if (options.strict) {\n throw new Error(warning.message);\n }\n const max = options.maxWarnings ?? 100;\n if (warnings.length < max) {\n warnings.push(warning);\n }\n}\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n */\nfunction parseRecordTokens(\n tokens: Token[],\n start: number,\n warnings: MarcWarning[],\n options: ParseOptions\n): { record: MarcRecord; end: number } {\n let leader = '';\n let hasLeader = false;\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n if (!hasLeader) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'Record has no <leader> element'),\n options\n );\n }\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n hasLeader = true;\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n if (leader.length !== 24) {\n emitWarning(\n warnings,\n createWarning('invalid_leader', `Leader is ${leader.length} characters, expected 24`),\n options\n );\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'controlfield missing tag attribute'),\n options\n );\n }\n fields.push({ tag: tag ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'controlfield missing tag attribute'),\n options\n );\n }\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag: tag ?? '', data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'datafield missing tag attribute'),\n options\n );\n }\n fields.push({\n tag: tag ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'datafield missing tag attribute'),\n options\n );\n }\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'];\n if (code === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'subfield missing code attribute', undefined, tag),\n options\n );\n }\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code: code ?? '', value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag: tag ?? '', indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n if (!hasLeader) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'Record has no <leader> element'),\n options\n );\n }\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string, returning per-record parse results including warnings.\n * Unlike parseMarcXml, every record attempt is included even if it produced warnings.\n */\nexport function parseMarcXmlWithWarnings(xml: string, options?: ParseOptions): ParseBatchResult {\n const opts = options ?? {};\n const tokWarnings: MarcWarning[] = [];\n const tokens = tokenise(xml, tokWarnings);\n const results: ParseResult[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const recordWarnings: MarcWarning[] = [];\n const { record, end } = parseRecordTokens(tokens, i + 1, recordWarnings, opts);\n results.push({ record, warnings: recordWarnings });\n i = end;\n continue;\n }\n i++;\n }\n\n if (results.length === 0 && tokWarnings.length > 0) {\n results.push({ record: null, warnings: tokWarnings });\n } else if (tokWarnings.length > 0 && results.length > 0) {\n results[0] = {\n record: results[0]!.record,\n warnings: [...tokWarnings, ...results[0]!.warnings],\n };\n }\n\n return { results };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all successfully parsed records.\n */\nexport function parseMarcXml(xml: string, options?: ParseOptions): MarcRecord[] {\n const batch = parseMarcXmlWithWarnings(xml, options);\n return batch.results.map((r) => r.record).filter((r): r is MarcRecord => r !== null);\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\nfunction serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(\n `${INDENT}<controlfield tag=\"${escapeXml(field.tag)}\">${escapeXml(field.data)}</controlfield>`\n );\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(\n `${INDENT}<datafield tag=\"${escapeXml(field.tag)}\" ind1=\"${escapeXml(ind1)}\" ind2=\"${escapeXml(ind2)}\">`\n );\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${escapeXml(sf.code)}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [XML_HEADER, `<collection ${COLLECTION_NS}>`];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":"gJA0BA,IAAM,EAA0C,IAAI,IAAI,CACtD,CAAC,MAAO,GAAG,EACX,CAAC,KAAM,GAAG,EACV,CAAC,KAAM,GAAG,EACV,CAAC,OAAQ,GAAG,EACZ,CAAC,OAAQ,GAAG,CACd,CAAC,EAED,SAAS,EAAY,EAAsB,CACzC,OAAO,EAAK,QAAQ,gDAAA,CAAkD,EAAG,EAAK,EAAK,IAAS,CAC1F,GAAI,IAAQ,OAAW,CACrB,MAAM,EAAK,SAAS,EAAK,EAAE,EAC3B,OAAO,GAAM,GAAK,GAAM,QAAW,OAAO,cAAc,CAAE,EAAI,GAChE,CACA,GAAI,IAAQ,OAAW,CACrB,MAAM,EAAK,SAAS,EAAK,EAAE,EAC3B,OAAO,GAAM,GAAK,GAAM,QAAW,OAAO,cAAc,CAAE,EAAI,GAChE,CACA,OAAO,EAAW,IAAI,CAAI,GAAK,CACjC,CAAC,CACH,CAEA,SAAS,EAAU,EAAsB,CACvC,OACE,EAIG,QAAQ,gCAAiC,GAAG,EAC5C,QAAQ,KAAM,OAAO,EACrB,QAAQ,KAAM,MAAM,EACpB,QAAQ,KAAM,MAAM,EACpB,QAAQ,KAAM,QAAQ,EAGtB,QAAQ,MAAO,OAAO,CAE7B,CAeA,SAAS,EAAU,EAAqB,CACtC,MAAM,EAAQ,EAAI,QAAQ,GAAG,EAC7B,OAAO,IAAU,GAAK,EAAM,EAAI,MAAM,EAAQ,CAAC,CACjD,CAKA,SAAS,EAAW,EAAyC,CAC3D,MAAM,EAAgC,CAAC,EACjC,EAAK,qDACX,IAAI,EACJ,MAAQ,EAAI,EAAG,KAAK,CAAO,KAAO,MAAM,CACtC,MAAM,EAAM,EAAU,EAAE,CAAA,CAAG,EAC3B,EAAM,CAAA,EAAO,EAAY,EAAE,CAAA,GAAM,EAAE,CAAA,GAAM,EAAE,CAC7C,CACA,OAAO,CACT,CAKA,SAAS,EAAW,EAAa,EAAuB,CACtD,IAAI,EAAW,GACX,EAAW,GACf,QAAS,EAAI,EAAO,EAAI,EAAI,OAAQ,IAAK,CACvC,MAAM,EAAK,EAAI,CAAA,EACf,GAAI,IAAO,KAAO,CAAC,EAAU,EAAW,CAAC,UAChC,IAAO,KAAO,CAAC,EAAU,EAAW,CAAC,UACrC,IAAO,KAAO,CAAC,GAAY,CAAC,EAAU,OAAO,CACxD,CACA,MAAO,EACT,CAOA,SAAS,EAAS,EAAa,EAAmC,CAChE,MAAM,EAAkB,CAAC,EACzB,IAAI,EAAI,EAER,KAAO,EAAI,EAAI,QAAQ,CACrB,MAAM,EAAQ,EAAI,QAAQ,IAAK,CAAC,EAGhC,GAAI,IAAU,GAAI,CACH,EAAI,MAAM,CAAC,EAAE,KACtB,GAAM,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAY,EAAI,MAAM,CAAC,CAAC,CAAE,CAAC,EACvE,KACF,CAEA,GAAI,EAAQ,EAAG,CACb,MAAM,EAAM,EAAI,MAAM,EAAG,CAAK,EACjB,EAAI,KACb,GAAM,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,EAAY,CAAG,CAAE,CAAC,CAChE,CAGA,GAAI,EAAI,WAAW,OAAQ,CAAK,EAAG,CACjC,MAAM,EAAa,EAAI,QAAQ,MAAO,EAAQ,CAAC,EAC3C,IAAe,IACjB,GAAU,KAAK,EAAA,cAAc,gBAAiB,sBAAsB,CAAC,EACrE,EAAI,EAAI,QAER,EAAI,EAAa,EAEnB,QACF,CAEA,GAAI,EAAI,WAAW,YAAa,CAAK,EAAG,CACtC,MAAM,EAAW,EAAI,QAAQ,MAAO,EAAQ,CAAC,EAC7C,GAAI,IAAa,GACf,GAAU,KAAK,EAAA,cAAc,gBAAiB,4BAA4B,CAAC,EAC3E,EAAI,EAAI,WACH,CAEL,MAAM,EAAY,EAAI,MAAM,EAAQ,EAAG,CAAQ,EAC3C,GAAW,EAAO,KAAK,CAAE,KAAM,OAAQ,KAAM,CAAU,CAAC,EAC5D,EAAI,EAAW,CACjB,CACA,QACF,CAEA,GAAI,EAAI,WAAW,KAAM,CAAK,EAAG,CAC/B,MAAM,EAAQ,EAAI,QAAQ,KAAM,EAAQ,CAAC,EACzC,EAAI,IAAU,GAAK,EAAI,OAAS,EAAQ,EACxC,QACF,CAEA,GAAI,EAAI,WAAW,KAAM,CAAK,EAAG,CAC/B,MAAM,EAAU,EAAI,QAAQ,IAAK,EAAQ,CAAC,EAC1C,EAAI,IAAY,GAAK,EAAI,OAAS,EAAU,EAC5C,QACF,CAGA,MAAM,EAAQ,EAAW,EAAK,EAAQ,CAAC,EACvC,GAAI,IAAU,GAAI,CAChB,GAAU,KAAK,EAAA,cAAc,gBAAiB,8BAA8B,CAAC,EAC7E,KACF,CAEA,MAAM,EAAM,EAAI,MAAM,EAAQ,EAAG,CAAK,EAEtC,GAAI,EAAI,WAAW,GAAG,EACpB,EAAO,KAAK,CAAE,KAAM,QAAS,KAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC,CAAE,CAAC,UAC1D,EAAI,SAAS,GAAG,EAAG,CAC5B,MAAM,EAAQ,EAAI,MAAM,EAAG,EAAE,EAAE,KAAK,EAC9B,EAAW,EAAM,OAAO,IAAI,EAC5B,EAAO,IAAa,GAAK,EAAQ,EAAM,MAAM,EAAG,CAAQ,EACxD,EAAU,IAAa,GAAK,GAAK,EAAM,MAAM,CAAQ,EAC3D,EAAO,KAAK,CACV,KAAM,aACN,KAAM,EAAU,CAAI,EACpB,MAAO,EAAW,CAAO,CAC3B,CAAC,CACH,KAAO,CACL,MAAM,EAAW,EAAI,OAAO,IAAI,EAC1B,EAAO,IAAa,GAAK,EAAM,EAAI,MAAM,EAAG,CAAQ,EACpD,EAAU,IAAa,GAAK,GAAK,EAAI,MAAM,CAAQ,EACzD,EAAO,KAAK,CACV,KAAM,OACN,KAAM,EAAU,CAAI,EACpB,MAAO,EAAW,CAAO,CAC3B,CAAC,CACH,CAEA,EAAI,EAAQ,CACd,CAEA,OAAO,CACT,CAIA,SAAS,EAAY,EAAyB,EAAsB,EAA6B,CAC/F,GAAI,EAAQ,OACV,MAAM,IAAI,MAAM,EAAQ,OAAO,EAEjC,MAAM,EAAM,EAAQ,aAAe,IAC/B,EAAS,OAAS,GACpB,EAAS,KAAK,CAAO,CAEzB,CAKA,SAAS,EACP,EACA,EACA,EACA,EACqC,CACrC,IAAI,EAAS,GACT,EAAY,GAChB,MAAM,EAAuC,CAAC,EAC9C,IAAI,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAM,EAAO,CAAA,EAEnB,GAAI,EAAI,OAAS,SAAW,EAAI,OAAS,SACvC,OAAK,GACH,EACE,EACA,EAAA,cAAc,kBAAmB,gCAAgC,EACjE,CACF,EAEK,CAAE,OAAQ,CAAE,OAAA,EAAQ,OAAA,CAAO,EAAG,IAAK,EAAI,CAAE,EAGlD,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,SAAU,CAChD,EAAY,GACZ,IACI,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAS,EAAO,CAAA,EAAI,KAAM,KAAK,EAC/B,KAEE,EAAO,SAAW,IACpB,EACE,EACA,EAAA,cAAc,iBAAkB,aAAa,EAAO,MAAA,0BAAgC,EACpF,CACF,EAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,QACF,CAEA,GAAI,EAAI,OAAS,cAAgB,EAAI,OAAS,eAAgB,CAC5D,MAAM,EAAM,EAAI,OAAQ,IACpB,IAAQ,QACV,EACE,EACA,EAAA,cAAc,kBAAmB,oCAAoC,EACrE,CACF,EAEF,EAAO,KAAK,CAAE,IAAK,GAAO,GAAI,KAAM,EAAG,CAAC,EACxC,IACA,QACF,CAEA,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,eAAgB,CACtD,MAAM,EAAM,EAAI,OAAQ,IACpB,IAAQ,QACV,EACE,EACA,EAAA,cAAc,kBAAmB,oCAAoC,EACrE,CACF,EAEF,IACA,IAAI,EAAO,GACP,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAO,EAAO,CAAA,EAAI,MAAQ,GAC1B,KAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,EAAO,KAAK,CAAE,IAAK,GAAO,GAAI,KAAA,CAAK,CAAC,EACpC,QACF,CAEA,GAAI,EAAI,OAAS,cAAgB,EAAI,OAAS,YAAa,CACzD,MAAM,EAAM,EAAI,OAAQ,IACpB,IAAQ,QACV,EACE,EACA,EAAA,cAAc,kBAAmB,iCAAiC,EAClE,CACF,EAEF,EAAO,KAAK,CACV,IAAK,GAAO,GACZ,WAAY,EAAI,OAAQ,MAAW,IACnC,WAAY,EAAI,OAAQ,MAAW,IACnC,UAAW,CAAC,CACd,CAAC,EACD,IACA,QACF,CAEA,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,YAAa,CACnD,MAAM,EAAM,EAAI,OAAQ,IACpB,IAAQ,QACV,EACE,EACA,EAAA,cAAc,kBAAmB,iCAAiC,EAClE,CACF,EAEF,MAAM,EAAa,EAAI,OAAQ,MAAW,IACpC,EAAa,EAAI,OAAQ,MAAW,IACpC,EAAwB,CAAC,EAG/B,IAFA,IAEO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAO,EAAO,CAAA,EACpB,GAAI,EAAK,OAAS,SAAW,EAAK,OAAS,YAAa,CACtD,IACA,KACF,CACA,GAAI,EAAK,OAAS,QAAU,EAAK,OAAS,WAAY,CACpD,MAAM,EAAO,EAAK,OAAQ,KACtB,IAAS,QACX,EACE,EACA,EAAA,cAAc,kBAAmB,kCAAmC,OAAW,CAAG,EAClF,CACF,EAEF,IACA,IAAI,EAAQ,GACR,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAC3C,EAAQ,EAAO,CAAA,EAAI,MAAQ,GAC3B,KAGE,EAAI,EAAO,QAAU,EAAO,CAAA,EAAI,OAAS,SAAS,IACtD,EAAU,KAAK,CAAE,KAAM,GAAQ,GAAI,MAAA,CAAM,CAAC,EAC1C,QACF,CACA,GACF,CAEA,EAAO,KAAK,CAAE,IAAK,GAAO,GAAI,WAAA,EAAY,WAAA,EAAY,UAAA,CAAU,CAAC,EACjE,QACF,CAEA,GACF,CAEA,OAAK,GACH,EACE,EACA,EAAA,cAAc,kBAAmB,gCAAgC,EACjE,CACF,EAEK,CAAE,OAAQ,CAAE,OAAA,EAAQ,OAAA,CAAO,EAAG,IAAK,CAAE,CAC9C,CAMA,SAAgB,EAAyB,EAAa,EAA0C,CAC9F,MAAM,EAAO,GAAW,CAAC,EACnB,EAA6B,CAAC,EAC9B,EAAS,EAAS,EAAK,CAAW,EAClC,EAAyB,CAAC,EAChC,IAAI,EAAI,EAER,KAAO,EAAI,EAAO,QAAQ,CACxB,MAAM,EAAM,EAAO,CAAA,EACnB,GAAI,EAAI,OAAS,QAAU,EAAI,OAAS,SAAU,CAChD,MAAM,EAAgC,CAAC,EACjC,CAAE,OAAA,EAAQ,IAAA,CAAA,EAAQ,EAAkB,EAAQ,EAAI,EAAG,EAAgB,CAAI,EAC7E,EAAQ,KAAK,CAAE,OAAA,EAAQ,SAAU,CAAe,CAAC,EACjD,EAAI,EACJ,QACF,CACA,GACF,CAEA,OAAI,EAAQ,SAAW,GAAK,EAAY,OAAS,EAC/C,EAAQ,KAAK,CAAE,OAAQ,KAAM,SAAU,CAAY,CAAC,EAC3C,EAAY,OAAS,GAAK,EAAQ,OAAS,IACpD,EAAQ,CAAA,EAAK,CACX,OAAQ,EAAQ,CAAA,EAAI,OACpB,SAAU,CAAC,GAAG,EAAa,GAAG,EAAQ,CAAA,EAAI,QAAQ,CACpD,GAGK,CAAE,QAAA,CAAQ,CACnB,CAMA,SAAgB,EAAa,EAAa,EAAsC,CAE9E,OADc,EAAyB,EAAK,CACrC,EAAM,QAAQ,IAAK,GAAM,EAAE,MAAM,EAAE,OAAQ,GAAuB,IAAM,IAAI,CACrF,CAIA,IAAM,EAAa;AAAA,EACb,EAAgB,yCAChB,EAAS,KAEf,SAAS,EAAuB,EAA4B,CAC1D,MAAM,EAAkB,CAAC,WAAW,CAAA,GAAgB,EACpD,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY,EAElE,UAAW,KAAS,EAAO,OACzB,GAAI,EAAA,eAAe,CAAK,EACtB,EAAM,KACJ,GAAG,CAAA,sBAA4B,EAAU,EAAM,GAAG,CAAA,KAAM,EAAU,EAAM,IAAI,CAAA,iBAC9E,MACK,CACL,MAAM,EAAO,EAAM,aAAe,IAAM,IAAM,EAAM,WAC9C,EAAO,EAAM,aAAe,IAAM,IAAM,EAAM,WACpD,EAAM,KACJ,GAAG,CAAA,mBAAyB,EAAU,EAAM,GAAG,CAAA,WAAY,EAAU,CAAI,CAAA,WAAY,EAAU,CAAI,CAAA,IACrG,EACA,UAAW,KAAM,EAAM,UACrB,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAU,EAAG,IAAI,CAAA,KAAM,EAAU,EAAG,KAAK,CAAA,aAChF,EAEF,EAAM,KAAK,GAAG,CAAA,cAAoB,CACpC,CAGF,OAAA,EAAM,KAAK,WAAW,EACf,EAAM,KAAK;AAAA,CAAI,CACxB,CAKA,SAAgB,EAAiB,EAA+B,CAC9D,MAAM,EAAkB,CAAC,EAAY,eAAe,CAAA,GAAgB,EAEpE,UAAW,KAAU,EAAS,CAE5B,MAAM,EAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAK,GAAS,EAAS,CAAI,EAC3B,KAAK;AAAA,CAAI,EACZ,EAAM,KAAK,CAAS,CACtB,CAEA,OAAA,EAAM,KAAK,eAAe,EACnB,EAAM,KAAK;AAAA,CAAI,CACxB"}
package/dist/marcxml.d.ts CHANGED
@@ -1,9 +1,14 @@
1
- import { MarcRecord } from './types';
1
+ import { MarcRecord, ParseOptions, ParseBatchResult } from './types';
2
+ /**
3
+ * Parse a MARCXML string, returning per-record parse results including warnings.
4
+ * Unlike parseMarcXml, every record attempt is included even if it produced warnings.
5
+ */
6
+ export declare function parseMarcXmlWithWarnings(xml: string, options?: ParseOptions): ParseBatchResult;
2
7
  /**
3
8
  * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.
4
- * Returns all records found.
9
+ * Returns all successfully parsed records.
5
10
  */
6
- export declare function parseMarcXml(xml: string): MarcRecord[];
11
+ export declare function parseMarcXml(xml: string, options?: ParseOptions): MarcRecord[];
7
12
  /**
8
13
  * Serialize one or more MarcRecords into a MARCXML `<collection>` document.
9
14
  */