@plurnk/plurnk-mimetypes-application-jsonl 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +24 -0
- package/dist/Jsonl.d.ts +18 -0
- package/dist/Jsonl.d.ts.map +1 -0
- package/dist/Jsonl.js +63 -0
- package/dist/Jsonl.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/package.json +53 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 PossumTech Laboratories, LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# @plurnk/plurnk-mimetypes-application-jsonl
|
|
2
|
+
|
|
3
|
+
`application/jsonl` (JSON Lines / NDJSON) mimetype handler for the [plurnk](https://github.com/plurnk) ecosystem. Hand-rolled, no parser dependency.
|
|
4
|
+
|
|
5
|
+
## install
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
npm i @plurnk/plurnk-mimetypes-application-jsonl
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## what it does
|
|
12
|
+
|
|
13
|
+
One JSON value per line — training data, eval sets, fine-tune files, chat/agent logs. The structural definition of a JSONL dataset is its **record schema**, not its rows (a file can be millions of lines), so:
|
|
14
|
+
|
|
15
|
+
- `extractRaw(content)` — the schema: each distinct top-level key across records becomes a `field` symbol at the line it first appears. Scale-safe (bounded by schema width, not record count), and it answers "what is this dataset" — `{prompt, completion, score}`.
|
|
16
|
+
- `extent` — the **record count**, the unit you address by.
|
|
17
|
+
- `deepJson(content)` — the parsed records array, a jsonpath target (`$[5].completion`), computed only on demand.
|
|
18
|
+
- `query(content, dialect, pattern)` — jsonpath dispatches against the records array; regex/glob against the raw text.
|
|
19
|
+
|
|
20
|
+
Lenient by design: blank lines are skipped, and a line that doesn't parse is skipped (a trailing newline or a partial write doesn't poison the file). The raw body is already readable JSON-per-line, so there is no content projection. References are not applicable.
|
|
21
|
+
|
|
22
|
+
## license
|
|
23
|
+
|
|
24
|
+
MIT.
|
package/dist/Jsonl.d.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { BaseHandler } from "@plurnk/plurnk-mimetypes";
|
|
2
|
+
import type { HandlerContent, MimeSymbol } from "@plurnk/plurnk-mimetypes";
|
|
3
|
+
export default class Jsonl extends BaseHandler {
|
|
4
|
+
extractRaw(content: HandlerContent): MimeSymbol[];
|
|
5
|
+
deepJson(content: HandlerContent): unknown;
|
|
6
|
+
extent(content: HandlerContent): number;
|
|
7
|
+
}
|
|
8
|
+
interface SchemaEntry {
|
|
9
|
+
key: string;
|
|
10
|
+
firstLine: number;
|
|
11
|
+
}
|
|
12
|
+
export interface JsonlScan {
|
|
13
|
+
records: unknown[];
|
|
14
|
+
schema: SchemaEntry[];
|
|
15
|
+
}
|
|
16
|
+
export declare function scan(text: string): JsonlScan;
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=Jsonl.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Jsonl.d.ts","sourceRoot":"","sources":["../src/Jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AACvD,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,0BAA0B,CAAC;AAgB3E,MAAM,CAAC,OAAO,OAAO,KAAM,SAAQ,WAAW;IACjC,UAAU,CAAC,OAAO,EAAE,cAAc,GAAG,UAAU,EAAE;IASjD,QAAQ,CAAC,OAAO,EAAE,cAAc,GAAG,OAAO;IAI1C,MAAM,CAAC,OAAO,EAAE,cAAc,GAAG,MAAM;CAGnD;AAED,UAAU,WAAW;IACjB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,SAAS;IACtB,OAAO,EAAE,OAAO,EAAE,CAAC;IACnB,MAAM,EAAE,WAAW,EAAE,CAAC;CACzB;AAED,wBAAgB,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAyB5C"}
|
package/dist/Jsonl.js
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { BaseHandler } from "@plurnk/plurnk-mimetypes";
|
|
2
|
+
// application/jsonl (JSON Lines / NDJSON) handler — Tier 4, no parser dep.
|
|
3
|
+
//
|
|
4
|
+
// One JSON value per line: training data, eval sets, fine-tune files, chat /
|
|
5
|
+
// agent logs. The structural definition of a JSONL dataset is its RECORD
|
|
6
|
+
// SCHEMA — the union of top-level keys across records — not its rows: a file
|
|
7
|
+
// can be millions of lines, so one-symbol-per-record would explode and
|
|
8
|
+
// sampling would lie. So symbols are the schema (each distinct top-level key →
|
|
9
|
+
// a `field` at the line it first appears), and `extent` is the record count
|
|
10
|
+
// (the unit you address by). The records themselves live in deepJson — the
|
|
11
|
+
// parsed array, a jsonpath target (`$[N].field`) computed only on demand.
|
|
12
|
+
//
|
|
13
|
+
// Lenient: blank lines are skipped, a line that doesn't parse is skipped (a
|
|
14
|
+
// trailing newline or a partial write doesn't poison the file). The raw body
|
|
15
|
+
// is already readable JSON-per-line, so there is no content projection.
|
|
16
|
+
export default class Jsonl extends BaseHandler {
|
|
17
|
+
extractRaw(content) {
|
|
18
|
+
return scan(toText(content)).schema.map((s) => ({
|
|
19
|
+
name: s.key,
|
|
20
|
+
kind: "field",
|
|
21
|
+
line: s.firstLine,
|
|
22
|
+
endLine: s.firstLine,
|
|
23
|
+
}));
|
|
24
|
+
}
|
|
25
|
+
deepJson(content) {
|
|
26
|
+
return scan(toText(content)).records;
|
|
27
|
+
}
|
|
28
|
+
extent(content) {
|
|
29
|
+
return scan(toText(content)).records.length;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export function scan(text) {
|
|
33
|
+
const lines = text.split("\n");
|
|
34
|
+
const records = [];
|
|
35
|
+
const schema = [];
|
|
36
|
+
const seen = new Set();
|
|
37
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
38
|
+
const line = lines[i].trim();
|
|
39
|
+
if (line.length === 0)
|
|
40
|
+
continue;
|
|
41
|
+
let value;
|
|
42
|
+
try {
|
|
43
|
+
value = JSON.parse(line);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
records.push(value);
|
|
49
|
+
if (typeof value === "object" && value !== null && !Array.isArray(value)) {
|
|
50
|
+
for (const key of Object.keys(value)) {
|
|
51
|
+
if (!seen.has(key)) {
|
|
52
|
+
seen.add(key);
|
|
53
|
+
schema.push({ key, firstLine: i + 1 });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return { records, schema };
|
|
59
|
+
}
|
|
60
|
+
function toText(content) {
|
|
61
|
+
return typeof content === "string" ? content : new TextDecoder("utf-8").decode(content);
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=Jsonl.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Jsonl.js","sourceRoot":"","sources":["../src/Jsonl.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAGvD,2EAA2E;AAC3E,EAAE;AACF,6EAA6E;AAC7E,yEAAyE;AACzE,6EAA6E;AAC7E,uEAAuE;AACvE,+EAA+E;AAC/E,4EAA4E;AAC5E,2EAA2E;AAC3E,0EAA0E;AAC1E,EAAE;AACF,4EAA4E;AAC5E,6EAA6E;AAC7E,wEAAwE;AACxE,MAAM,CAAC,OAAO,OAAO,KAAM,SAAQ,WAAW;IACjC,UAAU,CAAC,OAAuB;QACvC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5C,IAAI,EAAE,CAAC,CAAC,GAAG;YACX,IAAI,EAAE,OAAO;YACb,IAAI,EAAE,CAAC,CAAC,SAAS;YACjB,OAAO,EAAE,CAAC,CAAC,SAAS;SACvB,CAAC,CAAC,CAAC;IACR,CAAC;IAEQ,QAAQ,CAAC,OAAuB;QACrC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;IACzC,CAAC;IAEQ,MAAM,CAAC,OAAuB;QACnC,OAAO,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC;IAChD,CAAC;CACJ;AAYD,MAAM,UAAU,IAAI,CAAC,IAAY;IAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAChC,IAAI,KAAc,CAAC;QACnB,IAAI,CAAC;YACD,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;QAAC,MAAM,CAAC;YACL,SAAS;QACb,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACvE,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACnC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;oBACd,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBAC3C,CAAC;YACL,CAAC;QACL,CAAC;IACL,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;AAC/B,CAAC;AAED,SAAS,MAAM,CAAC,OAAuB;IACnC,OAAO,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;AAC5F,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,IAAI,KAAK,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,YAAY,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,IAAI,KAAK,EAAE,MAAM,YAAY,CAAC;AAC9C,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@plurnk/plurnk-mimetypes-application-jsonl",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "application/jsonl (JSON Lines / NDJSON) mimetype handler for plurnk-service. Record-schema symbols, records array as deepJson. Hand-rolled, no parser dependency.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"publishConfig": {
|
|
8
|
+
"access": "public"
|
|
9
|
+
},
|
|
10
|
+
"engines": {
|
|
11
|
+
"node": ">=25"
|
|
12
|
+
},
|
|
13
|
+
"plurnk": {
|
|
14
|
+
"kind": "mimetype",
|
|
15
|
+
"handlers": [
|
|
16
|
+
{
|
|
17
|
+
"name": "application/jsonl",
|
|
18
|
+
"glyph": "🧾",
|
|
19
|
+
"extensions": [
|
|
20
|
+
".jsonl",
|
|
21
|
+
".ndjson"
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
"exports": {
|
|
27
|
+
".": {
|
|
28
|
+
"types": "./dist/index.d.ts",
|
|
29
|
+
"default": "./dist/index.js"
|
|
30
|
+
},
|
|
31
|
+
"./package.json": "./package.json"
|
|
32
|
+
},
|
|
33
|
+
"files": [
|
|
34
|
+
"dist/**/*",
|
|
35
|
+
"README.md"
|
|
36
|
+
],
|
|
37
|
+
"scripts": {
|
|
38
|
+
"test:lint": "tsc --noEmit",
|
|
39
|
+
"test:unit": "node --test src/**/*.test.ts",
|
|
40
|
+
"test": "npm run test:lint && npm run test:unit",
|
|
41
|
+
"build:dist": "tsc -p tsconfig.build.json",
|
|
42
|
+
"build": "npm run build:dist",
|
|
43
|
+
"prepare": "npm run build"
|
|
44
|
+
},
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"@types/node": "^25.8.0",
|
|
47
|
+
"typescript": "^6.0.3",
|
|
48
|
+
"@plurnk/plurnk-mimetypes": "^0.15.0"
|
|
49
|
+
},
|
|
50
|
+
"peerDependencies": {
|
|
51
|
+
"@plurnk/plurnk-mimetypes": "^0.15.0"
|
|
52
|
+
}
|
|
53
|
+
}
|