@parseo/shared 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.d.ts ADDED
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Resolve a file path from CLI arguments, handling drag-and-drop artifacts.
3
+ *
4
+ * When a file is dragged into a terminal on macOS, the shell may:
5
+ * - Backslash-escape spaces, ampersands, parentheses, etc.
6
+ * - Split the path into multiple argv entries if quoting is incomplete.
7
+ * - Eat special characters like `&` even inside quotes when lines wrap.
8
+ *
9
+ * This function joins all non-flag arguments, strips leftover escape
10
+ * backslashes, trims surrounding quotes/whitespace, and if the exact path
11
+ * doesn't exist, walks up to find the deepest valid ancestor and fuzzy-matches
12
+ * each missing segment against the actual directory contents.
13
+ */
14
+ export declare function resolvePathFromArgs(args: string[]): string;
15
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAQA;;;;;;;;;;;;GAYG;AACH,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,CAuC1D"}
package/dist/cli.js ADDED
@@ -0,0 +1,86 @@
1
+ import { existsSync, readdirSync } from "fs";
2
+ import { resolve, join } from "path";
3
+ /**
4
+ * Flags that consume the next argument as their value (not part of the path).
5
+ */
6
+ const FLAGS_WITH_VALUES = new Set(["--output"]);
7
+ /**
8
+ * Resolve a file path from CLI arguments, handling drag-and-drop artifacts.
9
+ *
10
+ * When a file is dragged into a terminal on macOS, the shell may:
11
+ * - Backslash-escape spaces, ampersands, parentheses, etc.
12
+ * - Split the path into multiple argv entries if quoting is incomplete.
13
+ * - Eat special characters like `&` even inside quotes when lines wrap.
14
+ *
15
+ * This function joins all non-flag arguments, strips leftover escape
16
+ * backslashes, trims surrounding quotes/whitespace, and if the exact path
17
+ * doesn't exist, walks up to find the deepest valid ancestor and fuzzy-matches
18
+ * each missing segment against the actual directory contents.
19
+ */
20
+ export function resolvePathFromArgs(args) {
21
+ const pathParts = [];
22
+ for (let i = 0; i < args.length; i++) {
23
+ if (FLAGS_WITH_VALUES.has(args[i])) {
24
+ i++; // skip the flag's value
25
+ }
26
+ else if (!args[i].startsWith("--")) {
27
+ pathParts.push(args[i]);
28
+ }
29
+ }
30
+ if (pathParts.length === 0) {
31
+ return "";
32
+ }
33
+ let raw = pathParts.join(" ");
34
+ // Strip surrounding single or double quotes
35
+ raw = raw.replace(/^(['"])(.*)\1$/, "$2");
36
+ // Remove backslash escapes added by drag-and-drop (e.g. `\ `, `\&`, `\(`)
37
+ raw = raw.replace(/\\(?=[ &()'])/g, "");
38
+ raw = raw.trim();
39
+ const resolved = resolve(raw);
40
+ if (existsSync(resolved)) {
41
+ return resolved;
42
+ }
43
+ // Fuzzy resolve: walk the path segments and match against disk contents
44
+ // when a segment doesn't exist (e.g. shell ate `&` from a folder name).
45
+ const fuzzyResolved = fuzzyResolvePath(resolved);
46
+ if (fuzzyResolved) {
47
+ return fuzzyResolved;
48
+ }
49
+ console.error(`File not found: ${resolved}`);
50
+ process.exit(1);
51
+ }
52
+ /**
53
+ * Walk the path from root to leaf. At each level, if the segment doesn't
54
+ * match an entry exactly, find the entry whose name matches after collapsing
55
+ * whitespace and stripping punctuation (handles missing `&`, extra spaces, etc).
56
+ */
57
+ function fuzzyResolvePath(fullPath) {
58
+ const segments = fullPath.split("/");
59
+ let current = "/";
60
+ for (let i = 1; i < segments.length; i++) {
61
+ const seg = segments[i];
62
+ if (!seg)
63
+ continue;
64
+ const exact = join(current, seg);
65
+ if (existsSync(exact)) {
66
+ current = exact;
67
+ continue;
68
+ }
69
+ // Segment doesn't exist — try fuzzy match against directory entries
70
+ let entries;
71
+ try {
72
+ entries = readdirSync(current);
73
+ }
74
+ catch {
75
+ return null;
76
+ }
77
+ const normalize = (s) => s.toLowerCase().replace(/[^a-z0-9]/g, "");
78
+ const target = normalize(seg);
79
+ const match = entries.find((e) => normalize(e) === target);
80
+ if (!match) {
81
+ return null;
82
+ }
83
+ current = join(current, match);
84
+ }
85
+ return existsSync(current) ? current : null;
86
+ }
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ export { resolvePathFromArgs } from "./cli.js";
1
2
  export { extractTextItems, extractLines, formLines, extractFilledRects } from "./extract.js";
2
3
  export type { FilledRect } from "./extract.js";
3
4
  export { classifyDocument } from "./classify.js";
@@ -5,7 +6,6 @@ export type { FormatName, ClassifyResult, PackageName } from "./classify.js";
5
6
  export type { TextItem, TextSegment, TextLine, DateString, DateRange } from "./types.js";
6
7
  export { toBBox } from "./types.js";
7
8
  export type { BoundingBox } from "./types.js";
8
- export { resolvePathFromArgs } from "./cli.js";
9
9
  export { ParserError, InvalidPDFError, UnrecognizedFormatError, MissingSectionError, ExtractionError, } from "./errors.js";
10
10
  export { parseDate, parseDateRange, parseCurrency, parseNum, escapeRegex, cleanNumber, getSegmentNear, extractLabelValue, findLabelInText, isBulletLine, isNumberedEntry, parseBulletKeyValues, collectBulletItems, collectUntil, mapToColumns, findColumnHeaders, getSection, } from "./utils.js";
11
11
  export type { Section } from "./utils.js";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAC7F,YAAY,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AACjD,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC7E,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACzF,OAAO,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AACpC,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,mBAAmB,EAAE,MAAM,UAAU,CAAC;AAC/C,OAAO,EACL,WAAW,EACX,eAAe,EACf,uBAAuB,EACvB,mBAAmB,EACnB,eAAe,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,SAAS,EACT,cAAc,EACd,aAAa,EACb,QAAQ,EACR,WAAW,EACX,WAAW,EACX,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,oBAAoB,EACpB,kBAAkB,EAClB,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,UAAU,GACX,MAAM,YAAY,CAAC;AACpB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,UAAU,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,SAAS,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAC7F,YAAY,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AACjD,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAC7E,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACzF,OAAO,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AACpC,YAAY,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EACL,WAAW,EACX,eAAe,EACf,uBAAuB,EACvB,mBAAmB,EACnB,eAAe,GAChB,MAAM,aAAa,CAAC;AACrB,OAAO,EACL,SAAS,EACT,cAAc,EACd,aAAa,EACb,QAAQ,EACR,WAAW,EACX,WAAW,EACX,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,oBAAoB,EACpB,kBAAkB,EAClB,YAAY,EACZ,YAAY,EACZ,iBAAiB,EACjB,UAAU,GACX,MAAM,YAAY,CAAC;AACpB,YAAY,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC"}
package/dist/index.js CHANGED
@@ -1,6 +1,6 @@
1
+ export { resolvePathFromArgs } from "./cli.js";
1
2
  export { extractTextItems, extractLines, formLines, extractFilledRects } from "./extract.js";
2
3
  export { classifyDocument } from "./classify.js";
3
4
  export { toBBox } from "./types.js";
4
- export { resolvePathFromArgs } from "./cli.js";
5
5
  export { ParserError, InvalidPDFError, UnrecognizedFormatError, MissingSectionError, ExtractionError, } from "./errors.js";
6
6
  export { parseDate, parseDateRange, parseCurrency, parseNum, escapeRegex, cleanNumber, getSegmentNear, extractLabelValue, findLabelInText, isBulletLine, isNumberedEntry, parseBulletKeyValues, collectBulletItems, collectUntil, mapToColumns, findColumnHeaders, getSection, } from "./utils.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@parseo/shared",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -14,7 +14,9 @@
14
14
  "publishConfig": {
15
15
  "access": "public"
16
16
  },
17
- "files": ["dist", "!dist/**/cli.*"],
17
+ "files": [
18
+ "dist"
19
+ ],
18
20
  "scripts": {
19
21
  "build": "tsc"
20
22
  },