@sciencestack-ai/tokens 0.1.31 → 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/matching/LatexNormalizer.d.ts +7 -2
- package/dist/matching/LatexNormalizer.d.ts.map +1 -1
- package/dist/matching/LatexNormalizer.js +17 -100
- package/dist/matching/LatexNormalizer.js.map +1 -1
- package/dist/matching/MarkdownNormalizer.d.ts +7 -2
- package/dist/matching/MarkdownNormalizer.d.ts.map +1 -1
- package/dist/matching/MarkdownNormalizer.js +15 -81
- package/dist/matching/MarkdownNormalizer.js.map +1 -1
- package/dist/matching/index.d.ts +3 -2
- package/dist/matching/index.d.ts.map +1 -1
- package/dist/matching/index.js +2 -2
- package/dist/matching/index.js.map +1 -1
- package/dist/matching/normalizeWhitespace.d.ts +35 -0
- package/dist/matching/normalizeWhitespace.d.ts.map +1 -0
- package/dist/matching/normalizeWhitespace.js +115 -0
- package/dist/matching/normalizeWhitespace.js.map +1 -0
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -49,5 +49,5 @@ export { BibitemTokenNode } from "./references/BibitemTokenNode";
|
|
|
49
49
|
export { BibliographyTokenNode } from "./references/BibliographyTokenNode";
|
|
50
50
|
export { processTokenNodes } from "./tokenProcessing";
|
|
51
51
|
export { TokenExporter, ExportFormat, ExportOptions } from "./TokenExporter";
|
|
52
|
-
export { SpanMatcher, SpanInfo, MatchResult, TextNormalizer, NormalizationResult, LatexNormalizer, MarkdownNormalizer, } from "./matching";
|
|
52
|
+
export { SpanMatcher, SpanInfo, MatchResult, TextNormalizer, NormalizationResult, LatexNormalizer, MarkdownNormalizer, createLatexNormalizer, createMarkdownNormalizer, NormalizerOptions, } from "./matching/index";
|
|
53
53
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,cAAc,SAAS,CAAC;AACxB,cAAc,gBAAgB,CAAC;AAC/B,cAAc,UAAU,CAAC;AACzB,cAAc,SAAS,CAAC;AAGxB,cAAc,UAAU,CAAC;AAGzB,cAAc,eAAe,CAAC;AAC9B,cAAc,4BAA4B,CAAC;AAG3C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAG3D,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAG3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,cAAc,GACf,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGhE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAG9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAGzD,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EACL,gBAAgB,EAChB,mBAAmB,GACpB,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oCAAoC,CAAC;AAC9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAG9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAG7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGtD,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAG7E,OAAO,EACL,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,eAAe,EACf,kBAAkB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,cAAc,SAAS,CAAC;AACxB,cAAc,gBAAgB,CAAC;AAC/B,cAAc,UAAU,CAAC;AACzB,cAAc,SAAS,CAAC;AAGxB,cAAc,UAAU,CAAC;AAGzB,cAAc,eAAe,CAAC;AAC9B,cAAc,4BAA4B,CAAC;AAG3C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAG3D,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAG3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAG/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,cAAc,GACf,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAGhE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAG9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAGzD,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EACL,gBAAgB,EAChB,mBAAmB,GACpB,MAAM,2BAA2B,CAAC;AAGnC,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oCAAoC,CAAC;AAC9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAG9D,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAG7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAG3E,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAGtD,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAG7E,OAAO,EACL,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,mBAAmB,EACnB,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,wBAAwB,EACxB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -65,5 +65,5 @@ export { processTokenNodes } from "./tokenProcessing.js";
|
|
|
65
65
|
// Exporter
|
|
66
66
|
export { TokenExporter } from "./TokenExporter.js";
|
|
67
67
|
// Matching utilities
|
|
68
|
-
export { SpanMatcher, LatexNormalizer, MarkdownNormalizer, } from "./matching.js";
|
|
68
|
+
export { SpanMatcher, LatexNormalizer, MarkdownNormalizer, createLatexNormalizer, createMarkdownNormalizer, } from "./matching/index.js";
|
|
69
69
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,mBAAmB;AACnB,cAAc,SAAS,CAAC;AACxB,cAAc,gBAAgB,CAAC;AAC/B,cAAc,UAAU,CAAC;AACzB,cAAc,SAAS,CAAC;AAExB,gBAAgB;AAChB,cAAc,UAAU,CAAC;AAEzB,mBAAmB;AACnB,cAAc,eAAe,CAAC;AAC9B,cAAc,4BAA4B,CAAC;AAI3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAE3D,YAAY;AACZ,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,cAAc;AACd,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAE3E,kBAAkB;AAClB,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAE/D,mBAAmB;AACnB,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,cAAc,GACf,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAEhE,iBAAiB;AACjB,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,oBAAoB;AACpB,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAEzD,eAAe;AACf,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAE9D,gBAAgB;AAChB,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EACL,gBAAgB,GAEjB,MAAM,2BAA2B,CAAC;AAEnC,iBAAiB;AACjB,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oCAAoC,CAAC;AAC9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,kBAAkB;AAClB,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAE7D,sBAAsB;AACtB,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAE3E,mBAAmB;AACnB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,WAAW;AACX,OAAO,EAAE,aAAa,EAA+B,MAAM,iBAAiB,CAAC;AAE7E,qBAAqB;AACrB,OAAO,EACL,WAAW,EAKX,eAAe,EACf,kBAAkB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,mBAAmB;AACnB,cAAc,SAAS,CAAC;AACxB,cAAc,gBAAgB,CAAC;AAC/B,cAAc,UAAU,CAAC;AACzB,cAAc,SAAS,CAAC;AAExB,gBAAgB;AAChB,cAAc,UAAU,CAAC;AAEzB,mBAAmB;AACnB,cAAc,eAAe,CAAC;AAC9B,cAAc,4BAA4B,CAAC;AAI3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAE3D,YAAY;AACZ,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,cAAc;AACd,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,wBAAwB,EAAE,MAAM,iCAAiC,CAAC;AAE3E,kBAAkB;AAClB,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,wBAAwB,EAAE,MAAM,qCAAqC,CAAC;AAC/E,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAC3D,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAC/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,iCAAiC,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,gBAAgB,EAAE,MAAM,6BAA6B,CAAC;AAE/D,mBAAmB;AACnB,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,cAAc,GACf,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAC1D,OAAO,EACL,kBAAkB,EAClB,oBAAoB,GACrB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAC;AAEhE,iBAAiB;AACjB,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,oBAAoB;AACpB,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AACnE,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AAEzD,eAAe;AACf,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAE9D,gBAAgB;AAChB,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAC/D,OAAO,EACL,gBAAgB,GAEjB,MAAM,2BAA2B,CAAC;AAEnC,iBAAiB;AACjB,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAClE,OAAO,EAAE,wBAAwB,EAAE,MAAM,oCAAoC,CAAC;AAC9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AAE9D,kBAAkB;AAClB,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAE7D,sBAAsB;AACtB,OAAO,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,oCAAoC,CAAC;AAE3E,mBAAmB;AACnB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,WAAW;AACX,OAAO,EAAE,aAAa,EAA+B,MAAM,iBAAiB,CAAC;AAE7E,qBAAqB;AACrB,OAAO,EACL,WAAW,EAKX,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,wBAAwB,GAEzB,MAAM,kBAAkB,CAAC"}
|
|
@@ -4,12 +4,17 @@
|
|
|
4
4
|
* Handles LaTeX-specific transformations that should be ignored when matching:
|
|
5
5
|
* - Strips \label{...} commands
|
|
6
6
|
* - Normalizes display math delimiters (\[...\] vs $$...$$)
|
|
7
|
-
* - Normalizes whitespace
|
|
7
|
+
* - Normalizes all whitespace (newlines, tabs, spaces) to single space
|
|
8
8
|
* - Strips comments
|
|
9
9
|
*/
|
|
10
10
|
import { TextNormalizer } from './SpanMatcher';
|
|
11
|
+
import { NormalizerOptions } from './normalizeWhitespace';
|
|
11
12
|
/**
|
|
12
|
-
*
|
|
13
|
+
* Create a LaTeX normalizer with custom options
|
|
14
|
+
*/
|
|
15
|
+
export declare function createLatexNormalizer(options?: NormalizerOptions): TextNormalizer;
|
|
16
|
+
/**
|
|
17
|
+
* Default LatexNormalizer - normalizes whitespace but doesn't strip it all
|
|
13
18
|
*/
|
|
14
19
|
export declare const LatexNormalizer: TextNormalizer;
|
|
15
20
|
export default LatexNormalizer;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LatexNormalizer.d.ts","sourceRoot":"","sources":["../../src/matching/LatexNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,cAAc,EAAuB,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"LatexNormalizer.d.ts","sourceRoot":"","sources":["../../src/matching/LatexNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,cAAc,EAAuB,MAAM,eAAe,CAAC;AACpE,OAAO,EACL,iBAAiB,EAIlB,MAAM,uBAAuB,CAAC;AAoB/B;;GAEG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,cAAc,CAejF;AAED;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,cAAwC,CAAC;AAEvE,eAAe,eAAe,CAAC"}
|
|
@@ -4,12 +4,12 @@
|
|
|
4
4
|
* Handles LaTeX-specific transformations that should be ignored when matching:
|
|
5
5
|
* - Strips \label{...} commands
|
|
6
6
|
* - Normalizes display math delimiters (\[...\] vs $$...$$)
|
|
7
|
-
* - Normalizes whitespace
|
|
7
|
+
* - Normalizes all whitespace (newlines, tabs, spaces) to single space
|
|
8
8
|
* - Strips comments
|
|
9
9
|
*/
|
|
10
|
+
import { applyRemovals, applyReplacements, normalizeWhitespace, } from './normalizeWhitespace.js';
|
|
10
11
|
/**
|
|
11
12
|
* Patterns to strip from LaTeX text during normalization.
|
|
12
|
-
* Each pattern is removed and replaced with empty string.
|
|
13
13
|
*/
|
|
14
14
|
const STRIP_PATTERNS = [
|
|
15
15
|
// \label{...} commands
|
|
@@ -19,113 +19,30 @@ const STRIP_PATTERNS = [
|
|
|
19
19
|
];
|
|
20
20
|
/**
|
|
21
21
|
* Patterns to normalize (replace with standard form).
|
|
22
|
-
* [pattern, replacement]
|
|
23
22
|
*/
|
|
24
23
|
const NORMALIZE_PATTERNS = [
|
|
25
24
|
// Normalize display math: $$ ... $$ -> \[ ... \]
|
|
26
25
|
[/\$\$([\s\S]*?)\$\$/g, '\\[$1\\]'],
|
|
27
|
-
// Normalize multiple spaces to single space
|
|
28
|
-
[/ +/g, ' '],
|
|
29
|
-
// Normalize multiple newlines to double newline
|
|
30
|
-
[/\n{3,}/g, '\n\n'],
|
|
31
26
|
];
|
|
32
27
|
/**
|
|
33
|
-
*
|
|
34
|
-
* This tracks removals and keeps the mapping accurate.
|
|
28
|
+
* Create a LaTeX normalizer with custom options
|
|
35
29
|
*/
|
|
36
|
-
function
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
origIndex += sortedRemovals[removalIndex].length;
|
|
49
|
-
removalIndex++;
|
|
50
|
-
}
|
|
51
|
-
else {
|
|
52
|
-
// Copy character and record position mapping
|
|
53
|
-
posMap.push(origIndex);
|
|
54
|
-
normalized += original[origIndex];
|
|
55
|
-
origIndex++;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
return { normalized, posMap };
|
|
30
|
+
export function createLatexNormalizer(options) {
|
|
31
|
+
return {
|
|
32
|
+
normalize(text) {
|
|
33
|
+
// Step 1: Remove stripped patterns
|
|
34
|
+
let { text: current, posMap } = applyRemovals(text, STRIP_PATTERNS);
|
|
35
|
+
// Step 2: Apply replacements
|
|
36
|
+
({ text: current, posMap } = applyReplacements(current, posMap, NORMALIZE_PATTERNS));
|
|
37
|
+
// Step 3: Normalize whitespace (this is the key fix for newline handling)
|
|
38
|
+
const result = normalizeWhitespace(current, posMap, options);
|
|
39
|
+
return { normalized: result.normalized, posMap: result.posMap };
|
|
40
|
+
},
|
|
41
|
+
};
|
|
59
42
|
}
|
|
60
43
|
/**
|
|
61
|
-
* LatexNormalizer
|
|
44
|
+
* Default LatexNormalizer - normalizes whitespace but doesn't strip it all
|
|
62
45
|
*/
|
|
63
|
-
export const LatexNormalizer =
|
|
64
|
-
normalize(text) {
|
|
65
|
-
// Find all positions to remove
|
|
66
|
-
const removals = [];
|
|
67
|
-
for (const pattern of STRIP_PATTERNS) {
|
|
68
|
-
// Reset regex state
|
|
69
|
-
pattern.lastIndex = 0;
|
|
70
|
-
let match;
|
|
71
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
72
|
-
removals.push({ start: match.index, length: match[0].length });
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
// Build initial position map with removals
|
|
76
|
-
let { normalized, posMap } = buildPositionMap(text, removals);
|
|
77
|
-
// Apply normalization patterns that change content
|
|
78
|
-
// For these, we rebuild the position map
|
|
79
|
-
for (const [pattern, replacement] of NORMALIZE_PATTERNS) {
|
|
80
|
-
// Track positions before and after normalization
|
|
81
|
-
const newRemovals = [];
|
|
82
|
-
const additions = [];
|
|
83
|
-
pattern.lastIndex = 0;
|
|
84
|
-
let match;
|
|
85
|
-
let offset = 0;
|
|
86
|
-
while ((match = pattern.exec(normalized)) !== null) {
|
|
87
|
-
const matchLen = match[0].length;
|
|
88
|
-
const replaceLen = replacement.length;
|
|
89
|
-
// For simplicity, we just do the replacement and accept
|
|
90
|
-
// that position mapping becomes approximate for normalized sections
|
|
91
|
-
if (matchLen !== replaceLen) {
|
|
92
|
-
// Length changed - just do simple replacement
|
|
93
|
-
// Position accuracy is reduced but excerpt matching still works
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
// Apply the pattern replacement
|
|
97
|
-
const prevNormalized = normalized;
|
|
98
|
-
normalized = normalized.replace(pattern, replacement);
|
|
99
|
-
// Rebuild position map if length changed
|
|
100
|
-
if (normalized.length !== prevNormalized.length) {
|
|
101
|
-
// Simple approach: rebuild map based on character-by-character comparison
|
|
102
|
-
const newPosMap = [];
|
|
103
|
-
let origIdx = 0;
|
|
104
|
-
let normIdx = 0;
|
|
105
|
-
while (normIdx < normalized.length && origIdx < posMap.length) {
|
|
106
|
-
if (normalized[normIdx] === prevNormalized[origIdx]) {
|
|
107
|
-
// Character matches, keep mapping
|
|
108
|
-
newPosMap.push(posMap[origIdx]);
|
|
109
|
-
normIdx++;
|
|
110
|
-
origIdx++;
|
|
111
|
-
}
|
|
112
|
-
else {
|
|
113
|
-
// Characters differ - normalization changed this region
|
|
114
|
-
// Map to the start of the original region
|
|
115
|
-
newPosMap.push(posMap[Math.min(origIdx, posMap.length - 1)]);
|
|
116
|
-
normIdx++;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
// Handle any remaining characters
|
|
120
|
-
while (normIdx < normalized.length) {
|
|
121
|
-
newPosMap.push(posMap[posMap.length - 1] ?? text.length - 1);
|
|
122
|
-
normIdx++;
|
|
123
|
-
}
|
|
124
|
-
posMap = newPosMap;
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
return { normalized, posMap };
|
|
128
|
-
},
|
|
129
|
-
};
|
|
46
|
+
export const LatexNormalizer = createLatexNormalizer();
|
|
130
47
|
export default LatexNormalizer;
|
|
131
48
|
//# sourceMappingURL=LatexNormalizer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"LatexNormalizer.js","sourceRoot":"","sources":["../../src/matching/LatexNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;
|
|
1
|
+
{"version":3,"file":"LatexNormalizer.js","sourceRoot":"","sources":["../../src/matching/LatexNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,EAEL,aAAa,EACb,iBAAiB,EACjB,mBAAmB,GACpB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,MAAM,cAAc,GAAa;IAC/B,uBAAuB;IACvB,mBAAmB;IACnB,wDAAwD;IACxD,iBAAiB;CAClB,CAAC;AAEF;;GAEG;AACH,MAAM,kBAAkB,GAA4B;IAClD,iDAAiD;IACjD,CAAC,qBAAqB,EAAE,UAAU,CAAC;CACpC,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAA2B;IAC/D,OAAO;QACL,SAAS,CAAC,IAAY;YACpB,mCAAmC;YACnC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;YAEpE,6BAA6B;YAC7B,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,kBAAkB,CAAC,CAAC,CAAC;YAErF,0EAA0E;YAC1E,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YAE7D,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;QAClE,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAmB,qBAAqB,EAAE,CAAC;AAEvE,eAAe,eAAe,CAAC"}
|
|
@@ -4,11 +4,16 @@
|
|
|
4
4
|
* Handles Markdown-specific transformations that should be ignored when matching:
|
|
5
5
|
* - Strips HTML comments <!-- ... -->
|
|
6
6
|
* - Strips reference-style link definitions [id]: url
|
|
7
|
-
* - Normalizes whitespace
|
|
7
|
+
* - Normalizes all whitespace (newlines, tabs, spaces) to single space
|
|
8
8
|
*/
|
|
9
9
|
import { TextNormalizer } from './SpanMatcher';
|
|
10
|
+
import { NormalizerOptions } from './normalizeWhitespace';
|
|
10
11
|
/**
|
|
11
|
-
*
|
|
12
|
+
* Create a Markdown normalizer with custom options
|
|
13
|
+
*/
|
|
14
|
+
export declare function createMarkdownNormalizer(options?: NormalizerOptions): TextNormalizer;
|
|
15
|
+
/**
|
|
16
|
+
* Default MarkdownNormalizer - normalizes whitespace but doesn't strip it all
|
|
12
17
|
*/
|
|
13
18
|
export declare const MarkdownNormalizer: TextNormalizer;
|
|
14
19
|
export default MarkdownNormalizer;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"MarkdownNormalizer.d.ts","sourceRoot":"","sources":["../../src/matching/MarkdownNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,cAAc,EAAuB,MAAM,eAAe,CAAC;
|
|
1
|
+
{"version":3,"file":"MarkdownNormalizer.d.ts","sourceRoot":"","sources":["../../src/matching/MarkdownNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,cAAc,EAAuB,MAAM,eAAe,CAAC;AACpE,OAAO,EACL,iBAAiB,EAGlB,MAAM,uBAAuB,CAAC;AAY/B;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,CAAC,EAAE,iBAAiB,GAAG,cAAc,CAYpF;AAED;;GAEG;AACH,eAAO,MAAM,kBAAkB,EAAE,cAA2C,CAAC;AAE7E,eAAe,kBAAkB,CAAC"}
|
|
@@ -4,8 +4,9 @@
|
|
|
4
4
|
* Handles Markdown-specific transformations that should be ignored when matching:
|
|
5
5
|
* - Strips HTML comments <!-- ... -->
|
|
6
6
|
* - Strips reference-style link definitions [id]: url
|
|
7
|
-
* - Normalizes whitespace
|
|
7
|
+
* - Normalizes all whitespace (newlines, tabs, spaces) to single space
|
|
8
8
|
*/
|
|
9
|
+
import { applyRemovals, normalizeWhitespace, } from './normalizeWhitespace.js';
|
|
9
10
|
/**
|
|
10
11
|
* Patterns to strip from Markdown text during normalization.
|
|
11
12
|
*/
|
|
@@ -16,89 +17,22 @@ const STRIP_PATTERNS = [
|
|
|
16
17
|
/^\s*\[[^\]]+\]:\s+\S+(?:\s+"[^"]*")?$/gm,
|
|
17
18
|
];
|
|
18
19
|
/**
|
|
19
|
-
*
|
|
20
|
+
* Create a Markdown normalizer with custom options
|
|
20
21
|
*/
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
*/
|
|
32
|
-
function buildPositionMap(original, removals) {
|
|
33
|
-
// Sort removals by start position
|
|
34
|
-
const sortedRemovals = [...removals].sort((a, b) => a.start - b.start);
|
|
35
|
-
const posMap = [];
|
|
36
|
-
let normalized = '';
|
|
37
|
-
let origIndex = 0;
|
|
38
|
-
let removalIndex = 0;
|
|
39
|
-
while (origIndex < original.length) {
|
|
40
|
-
// Check if we're at a removal point
|
|
41
|
-
if (removalIndex < sortedRemovals.length &&
|
|
42
|
-
origIndex === sortedRemovals[removalIndex].start) {
|
|
43
|
-
// Skip over the removed content
|
|
44
|
-
origIndex += sortedRemovals[removalIndex].length;
|
|
45
|
-
removalIndex++;
|
|
46
|
-
}
|
|
47
|
-
else {
|
|
48
|
-
// Copy character and record position mapping
|
|
49
|
-
posMap.push(origIndex);
|
|
50
|
-
normalized += original[origIndex];
|
|
51
|
-
origIndex++;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return { normalized, posMap };
|
|
22
|
+
export function createMarkdownNormalizer(options) {
|
|
23
|
+
return {
|
|
24
|
+
normalize(text) {
|
|
25
|
+
// Step 1: Remove stripped patterns
|
|
26
|
+
let { text: current, posMap } = applyRemovals(text, STRIP_PATTERNS);
|
|
27
|
+
// Step 2: Normalize whitespace
|
|
28
|
+
const result = normalizeWhitespace(current, posMap, options);
|
|
29
|
+
return { normalized: result.normalized, posMap: result.posMap };
|
|
30
|
+
},
|
|
31
|
+
};
|
|
55
32
|
}
|
|
56
33
|
/**
|
|
57
|
-
* MarkdownNormalizer
|
|
34
|
+
* Default MarkdownNormalizer - normalizes whitespace but doesn't strip it all
|
|
58
35
|
*/
|
|
59
|
-
export const MarkdownNormalizer =
|
|
60
|
-
normalize(text) {
|
|
61
|
-
// Find all positions to remove
|
|
62
|
-
const removals = [];
|
|
63
|
-
for (const pattern of STRIP_PATTERNS) {
|
|
64
|
-
// Reset regex state
|
|
65
|
-
pattern.lastIndex = 0;
|
|
66
|
-
let match;
|
|
67
|
-
while ((match = pattern.exec(text)) !== null) {
|
|
68
|
-
removals.push({ start: match.index, length: match[0].length });
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
// Build initial position map with removals
|
|
72
|
-
let { normalized, posMap } = buildPositionMap(text, removals);
|
|
73
|
-
// Apply normalization patterns
|
|
74
|
-
for (const [pattern, replacement] of NORMALIZE_PATTERNS) {
|
|
75
|
-
const prevNormalized = normalized;
|
|
76
|
-
normalized = normalized.replace(pattern, replacement);
|
|
77
|
-
// Rebuild position map if length changed
|
|
78
|
-
if (normalized.length !== prevNormalized.length) {
|
|
79
|
-
const newPosMap = [];
|
|
80
|
-
let origIdx = 0;
|
|
81
|
-
let normIdx = 0;
|
|
82
|
-
while (normIdx < normalized.length && origIdx < posMap.length) {
|
|
83
|
-
if (normalized[normIdx] === prevNormalized[origIdx]) {
|
|
84
|
-
newPosMap.push(posMap[origIdx]);
|
|
85
|
-
normIdx++;
|
|
86
|
-
origIdx++;
|
|
87
|
-
}
|
|
88
|
-
else {
|
|
89
|
-
newPosMap.push(posMap[Math.min(origIdx, posMap.length - 1)]);
|
|
90
|
-
normIdx++;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
while (normIdx < normalized.length) {
|
|
94
|
-
newPosMap.push(posMap[posMap.length - 1] ?? text.length - 1);
|
|
95
|
-
normIdx++;
|
|
96
|
-
}
|
|
97
|
-
posMap = newPosMap;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
return { normalized, posMap };
|
|
101
|
-
},
|
|
102
|
-
};
|
|
36
|
+
export const MarkdownNormalizer = createMarkdownNormalizer();
|
|
103
37
|
export default MarkdownNormalizer;
|
|
104
38
|
//# sourceMappingURL=MarkdownNormalizer.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"MarkdownNormalizer.js","sourceRoot":"","sources":["../../src/matching/MarkdownNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;
|
|
1
|
+
{"version":3,"file":"MarkdownNormalizer.js","sourceRoot":"","sources":["../../src/matching/MarkdownNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,EAEL,aAAa,EACb,mBAAmB,GACpB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,MAAM,cAAc,GAAa;IAC/B,8BAA8B;IAC9B,kBAAkB;IAClB,uEAAuE;IACvE,yCAAyC;CAC1C,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,OAA2B;IAClE,OAAO;QACL,SAAS,CAAC,IAAY;YACpB,mCAAmC;YACnC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;YAEpE,+BAA+B;YAC/B,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;YAE7D,OAAO,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;QAClE,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAmB,wBAAwB,EAAE,CAAC;AAE7E,eAAe,kBAAkB,CAAC"}
|
package/dist/matching/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* Matching utilities for finding text excerpts in AST node spans
|
|
3
3
|
*/
|
|
4
4
|
export { SpanMatcher, SpanInfo, MatchResult, TextNormalizer, NormalizationResult, } from './SpanMatcher';
|
|
5
|
-
export { LatexNormalizer } from './LatexNormalizer';
|
|
6
|
-
export { MarkdownNormalizer } from './MarkdownNormalizer';
|
|
5
|
+
export { LatexNormalizer, createLatexNormalizer } from './LatexNormalizer';
|
|
6
|
+
export { MarkdownNormalizer, createMarkdownNormalizer } from './MarkdownNormalizer';
|
|
7
|
+
export { NormalizerOptions } from './normalizeWhitespace';
|
|
7
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/matching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,mBAAmB,GACpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/matching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,WAAW,EACX,QAAQ,EACR,WAAW,EACX,cAAc,EACd,mBAAmB,GACpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,sBAAsB,CAAC;AACpF,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC"}
|
package/dist/matching/index.js
CHANGED
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
* Matching utilities for finding text excerpts in AST node spans
|
|
3
3
|
*/
|
|
4
4
|
export { SpanMatcher, } from './SpanMatcher.js';
|
|
5
|
-
export { LatexNormalizer } from './LatexNormalizer.js';
|
|
6
|
-
export { MarkdownNormalizer } from './MarkdownNormalizer.js';
|
|
5
|
+
export { LatexNormalizer, createLatexNormalizer } from './LatexNormalizer.js';
|
|
6
|
+
export { MarkdownNormalizer, createMarkdownNormalizer } from './MarkdownNormalizer.js';
|
|
7
7
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/matching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,WAAW,GAKZ,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/matching/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EACL,WAAW,GAKZ,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,EAAE,kBAAkB,EAAE,wBAAwB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared whitespace normalization utilities for all normalizers
|
|
3
|
+
*/
|
|
4
|
+
export interface NormalizerOptions {
|
|
5
|
+
/** Remove ALL whitespace for aggressive matching (default: false) */
|
|
6
|
+
stripAllWhitespace?: boolean;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Normalize whitespace in text, building position map.
|
|
10
|
+
* By default: collapses all whitespace (spaces, tabs, newlines) to single space.
|
|
11
|
+
* With stripAllWhitespace: removes all whitespace entirely.
|
|
12
|
+
*/
|
|
13
|
+
export declare function normalizeWhitespace(text: string, posMap: number[], options?: NormalizerOptions): {
|
|
14
|
+
normalized: string;
|
|
15
|
+
posMap: number[];
|
|
16
|
+
};
|
|
17
|
+
/**
|
|
18
|
+
* Build initial position map (identity map)
|
|
19
|
+
*/
|
|
20
|
+
export declare function buildIdentityPosMap(length: number): number[];
|
|
21
|
+
/**
|
|
22
|
+
* Apply regex removals and build position map
|
|
23
|
+
*/
|
|
24
|
+
export declare function applyRemovals(text: string, patterns: RegExp[]): {
|
|
25
|
+
text: string;
|
|
26
|
+
posMap: number[];
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Apply regex replacements, updating position map approximately
|
|
30
|
+
*/
|
|
31
|
+
export declare function applyReplacements(text: string, posMap: number[], patterns: Array<[RegExp, string]>): {
|
|
32
|
+
text: string;
|
|
33
|
+
posMap: number[];
|
|
34
|
+
};
|
|
35
|
+
//# sourceMappingURL=normalizeWhitespace.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeWhitespace.d.ts","sourceRoot":"","sources":["../../src/matching/normalizeWhitespace.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,MAAM,WAAW,iBAAiB;IAChC,qEAAqE;IACrE,kBAAkB,CAAC,EAAE,OAAO,CAAC;CAC9B;AAED;;;;GAIG;AACH,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,EAAE,EAChB,OAAO,CAAC,EAAE,iBAAiB,GAC1B;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoC1C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,CAE5D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAAE,GACjB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAkCpC;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,EAAE,EAChB,QAAQ,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,GAChC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAmCpC"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared whitespace normalization utilities for all normalizers
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Normalize whitespace in text, building position map.
|
|
6
|
+
* By default: collapses all whitespace (spaces, tabs, newlines) to single space.
|
|
7
|
+
* With stripAllWhitespace: removes all whitespace entirely.
|
|
8
|
+
*/
|
|
9
|
+
export function normalizeWhitespace(text, posMap, options) {
|
|
10
|
+
const stripAll = options?.stripAllWhitespace ?? false;
|
|
11
|
+
const newPosMap = [];
|
|
12
|
+
let normalized = '';
|
|
13
|
+
let i = 0;
|
|
14
|
+
let lastWasSpace = false;
|
|
15
|
+
while (i < text.length) {
|
|
16
|
+
const char = text[i];
|
|
17
|
+
const isWhitespace = /\s/.test(char);
|
|
18
|
+
if (isWhitespace) {
|
|
19
|
+
if (stripAll) {
|
|
20
|
+
// Skip all whitespace
|
|
21
|
+
i++;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
else {
|
|
25
|
+
// Collapse to single space
|
|
26
|
+
if (!lastWasSpace) {
|
|
27
|
+
newPosMap.push(posMap[i]);
|
|
28
|
+
normalized += ' ';
|
|
29
|
+
lastWasSpace = true;
|
|
30
|
+
}
|
|
31
|
+
i++;
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Non-whitespace character
|
|
36
|
+
newPosMap.push(posMap[i]);
|
|
37
|
+
normalized += char;
|
|
38
|
+
lastWasSpace = false;
|
|
39
|
+
i++;
|
|
40
|
+
}
|
|
41
|
+
return { normalized, posMap: newPosMap };
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Build initial position map (identity map)
|
|
45
|
+
*/
|
|
46
|
+
export function buildIdentityPosMap(length) {
|
|
47
|
+
return Array.from({ length }, (_, i) => i);
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Apply regex removals and build position map
|
|
51
|
+
*/
|
|
52
|
+
export function applyRemovals(text, patterns) {
|
|
53
|
+
const removals = [];
|
|
54
|
+
for (const pattern of patterns) {
|
|
55
|
+
pattern.lastIndex = 0;
|
|
56
|
+
let match;
|
|
57
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
58
|
+
removals.push({ start: match.index, length: match[0].length });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Sort by start position
|
|
62
|
+
removals.sort((a, b) => a.start - b.start);
|
|
63
|
+
const posMap = [];
|
|
64
|
+
let normalized = '';
|
|
65
|
+
let origIndex = 0;
|
|
66
|
+
let removalIndex = 0;
|
|
67
|
+
while (origIndex < text.length) {
|
|
68
|
+
if (removalIndex < removals.length &&
|
|
69
|
+
origIndex === removals[removalIndex].start) {
|
|
70
|
+
origIndex += removals[removalIndex].length;
|
|
71
|
+
removalIndex++;
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
posMap.push(origIndex);
|
|
75
|
+
normalized += text[origIndex];
|
|
76
|
+
origIndex++;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return { text: normalized, posMap };
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Apply regex replacements, updating position map approximately
|
|
83
|
+
*/
|
|
84
|
+
export function applyReplacements(text, posMap, patterns) {
|
|
85
|
+
let currentText = text;
|
|
86
|
+
let currentPosMap = posMap;
|
|
87
|
+
for (const [pattern, replacement] of patterns) {
|
|
88
|
+
const prevText = currentText;
|
|
89
|
+
currentText = currentText.replace(pattern, replacement);
|
|
90
|
+
if (currentText.length !== prevText.length) {
|
|
91
|
+
// Rebuild position map approximately
|
|
92
|
+
const newPosMap = [];
|
|
93
|
+
let origIdx = 0;
|
|
94
|
+
let normIdx = 0;
|
|
95
|
+
while (normIdx < currentText.length && origIdx < currentPosMap.length) {
|
|
96
|
+
if (currentText[normIdx] === prevText[origIdx]) {
|
|
97
|
+
newPosMap.push(currentPosMap[origIdx]);
|
|
98
|
+
normIdx++;
|
|
99
|
+
origIdx++;
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
newPosMap.push(currentPosMap[Math.min(origIdx, currentPosMap.length - 1)]);
|
|
103
|
+
normIdx++;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
while (normIdx < currentText.length) {
|
|
107
|
+
newPosMap.push(currentPosMap[currentPosMap.length - 1] ?? 0);
|
|
108
|
+
normIdx++;
|
|
109
|
+
}
|
|
110
|
+
currentPosMap = newPosMap;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return { text: currentText, posMap: currentPosMap };
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=normalizeWhitespace.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeWhitespace.js","sourceRoot":"","sources":["../../src/matching/normalizeWhitespace.ts"],"names":[],"mappings":"AAAA;;GAEG;AASH;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CACjC,IAAY,EACZ,MAAgB,EAChB,OAA2B;IAE3B,MAAM,QAAQ,GAAG,OAAO,EAAE,kBAAkB,IAAI,KAAK,CAAC;IACtD,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,YAAY,EAAE,CAAC;YACjB,IAAI,QAAQ,EAAE,CAAC;gBACb,sBAAsB;gBACtB,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;iBAAM,CAAC;gBACN,2BAA2B;gBAC3B,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC1B,UAAU,IAAI,GAAG,CAAC;oBAClB,YAAY,GAAG,IAAI,CAAC;gBACtB,CAAC;gBACD,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,UAAU,IAAI,IAAI,CAAC;QACnB,YAAY,GAAG,KAAK,CAAC;QACrB,CAAC,EAAE,CAAC;IACN,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAC3B,IAAY,EACZ,QAAkB;IAElB,MAAM,QAAQ,GAA6C,EAAE,CAAC;IAE9D,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QACtB,IAAI,KAAK,CAAC;QACV,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAE3C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,OAAO,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC/B,IACE,YAAY,GAAG,QAAQ,CAAC,MAAM;YAC9B,SAAS,KAAK,QAAQ,CAAC,YAAY,CAAC,CAAC,KAAK,EAC1C,CAAC;YACD,SAAS,IAAI,QAAQ,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC;YAC3C,YAAY,EAAE,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvB,UAAU,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC;YAC9B,SAAS,EAAE,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAY,EACZ,MAAgB,EAChB,QAAiC;IAEjC,IAAI,WAAW,GAAG,IAAI,CAAC;IACvB,IAAI,aAAa,GAAG,MAAM,CAAC;IAE3B,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,QAAQ,EAAE,CAAC;QAC9C,MAAM,QAAQ,GAAG,WAAW,CAAC;QAC7B,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAExD,IAAI,WAAW,CAAC,MAAM,KAAK,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC3C,qCAAqC;YACrC,MAAM,SAAS,GAAa,EAAE,CAAC;YAC/B,IAAI,OAAO,GAAG,CAAC,CAAC;YAChB,IAAI,OAAO,GAAG,CAAC,CAAC;YAEhB,OAAO,OAAO,GAAG,WAAW,CAAC,MAAM,IAAI,OAAO,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC;gBACtE,IAAI,WAAW,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC/C,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;oBACvC,OAAO,EAAE,CAAC;oBACV,OAAO,EAAE,CAAC;gBACZ,CAAC;qBAAM,CAAC;oBACN,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;oBAC3E,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC;YAED,OAAO,OAAO,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC;gBACpC,SAAS,CAAC,IAAI,CAAC,aAAa,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBAC7D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,aAAa,GAAG,SAAS,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AACtD,CAAC"}
|