marcattacks 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +23 -0
- package/LICENSE +21 -0
- package/README-docker.md +39 -0
- package/README.md +111 -0
- package/TYPESCRIPT.txt +6 -0
- package/data/output.rdf +12425 -0
- package/data/sample.xml +2 -0
- package/demo/demo.jsonata +44 -0
- package/dist/index.js +150 -0
- package/docker-compose.yaml +37 -0
- package/logo.jpg +0 -0
- package/package.json +46 -0
- package/plugin/demo.js +12 -0
- package/src/httpstream.ts +28 -0
- package/src/index.ts +177 -0
- package/src/input/alephseq.ts +83 -0
- package/src/input/json.ts +47 -0
- package/src/input/jsonl.ts +47 -0
- package/src/input/xml.ts +125 -0
- package/src/marcmap.ts +94 -0
- package/src/output/alephseq.ts +48 -0
- package/src/output/json.ts +38 -0
- package/src/output/jsonl.ts +23 -0
- package/src/output/rdf.ts +63 -0
- package/src/output/xml.ts +84 -0
- package/src/plugin-loader.ts +27 -0
- package/src/s3stream.ts +266 -0
- package/src/sftpstream.ts +114 -0
- package/src/slow-writable.ts +165 -0
- package/src/transform/json.ts +36 -0
- package/src/transform/rdf.ts +398 -0
- package/tsconfig.json +46 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
(
|
|
2
|
+
/******start helper functions****/
|
|
3
|
+
$strip := function ($str) {
|
|
4
|
+
$replace($str,/\s*[\,.:\/]$/,"")
|
|
5
|
+
};
|
|
6
|
+
|
|
7
|
+
$marcmap0 := function ($path) {
|
|
8
|
+
$marcmap($path)[0]
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
$typeLookupTable := {
|
|
12
|
+
"_": "schema:CreativeWork", /* default */
|
|
13
|
+
"book": "schema:Book",
|
|
14
|
+
"catalog": "schema:Book",
|
|
15
|
+
"correspondence": "schema:CreativeWork",
|
|
16
|
+
"dissertation": "schema:Thesis",
|
|
17
|
+
"ephemera": "schema:CreativeWork",
|
|
18
|
+
"image": "schema:VisualWork",
|
|
19
|
+
"manuscript": "schema:Book",
|
|
20
|
+
"map": "schema:Map",
|
|
21
|
+
"master": "schema:Thesis",
|
|
22
|
+
"periodical": "schema:Periodical",
|
|
23
|
+
"phd": "schema:Thesis"
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
$typeLookup := function ($val) {(
|
|
27
|
+
$v := $lookup($typeLookupTable,$val);
|
|
28
|
+
$v ? $v : $typeLookupTable._
|
|
29
|
+
)};
|
|
30
|
+
/******end helper functions******/
|
|
31
|
+
|
|
32
|
+
{
|
|
33
|
+
"@id": "my:" & $marcmap0("001"),
|
|
34
|
+
"@type": $typeLookup($marcmap0("920a")),
|
|
35
|
+
"name": $marcmap("245ab") ~> $join(" ") ~> $strip() ,
|
|
36
|
+
"subject": $marcmap("500a") ~> $map(function ($val) {
|
|
37
|
+
{
|
|
38
|
+
"@id": $genid(),
|
|
39
|
+
"@type": "schema:Subject",
|
|
40
|
+
"name": $val
|
|
41
|
+
}
|
|
42
|
+
})
|
|
43
|
+
}
|
|
44
|
+
)
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import log4js from 'log4js';
|
|
3
|
+
import { program } from 'commander';
|
|
4
|
+
import { loadPlugin } from './plugin-loader.js';
|
|
5
|
+
import { sftpReadStream, sftpWriteStream, sftpLatestFile } from './sftpstream.js';
|
|
6
|
+
import { httpReadStream } from './httpstream.js';
|
|
7
|
+
import { Readable } from 'stream';
|
|
8
|
+
import { pathToFileURL } from "node:url";
|
|
9
|
+
import { SlowWritable } from './slow-writable.js';
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import fs from 'fs';
|
|
12
|
+
import { s3ReaderStream, s3WriterStream } from './s3stream.js';
|
|
13
|
+
log4js.configure({
|
|
14
|
+
appenders: {
|
|
15
|
+
err: {
|
|
16
|
+
type: "stderr",
|
|
17
|
+
layout: {
|
|
18
|
+
type: "pattern",
|
|
19
|
+
pattern: "%[%d %p %f{1} %m%]"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
categories: {
|
|
24
|
+
default: { appenders: ["err"], level: "off", enableCallStack: true }
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
program.version('0.1.0')
|
|
28
|
+
.argument('<file>')
|
|
29
|
+
.option('-f,--from <from>', 'input type', 'xml')
|
|
30
|
+
.option('-t,--to <output>', 'output type', 'json')
|
|
31
|
+
.option('-m,--map <map>', 'data mapper', 'json')
|
|
32
|
+
.option('--fix <what>', 'jsonata')
|
|
33
|
+
.option('-o,--out <file>', 'output file')
|
|
34
|
+
.option('--key <keyfile>', 'private key file')
|
|
35
|
+
.option('--info', 'output debugging messages')
|
|
36
|
+
.option('--debug', 'output more debugging messages')
|
|
37
|
+
.option('--trace', 'output much more debugging messages');
|
|
38
|
+
program.parse(process.argv);
|
|
39
|
+
const opts = program.opts();
|
|
40
|
+
const logger = log4js.getLogger();
|
|
41
|
+
if (opts.info) {
|
|
42
|
+
logger.level = "info";
|
|
43
|
+
}
|
|
44
|
+
if (opts.debug) {
|
|
45
|
+
logger.level = "debug";
|
|
46
|
+
}
|
|
47
|
+
if (opts.trace) {
|
|
48
|
+
logger.level = "trace";
|
|
49
|
+
}
|
|
50
|
+
main();
|
|
51
|
+
async function main() {
|
|
52
|
+
const url = program.args[0];
|
|
53
|
+
if (!url) {
|
|
54
|
+
console.error(`need an input file`);
|
|
55
|
+
process.exit(2);
|
|
56
|
+
}
|
|
57
|
+
let inputFile;
|
|
58
|
+
if (fs.existsSync(url)) {
|
|
59
|
+
const filePath = path.resolve(process.cwd(), url);
|
|
60
|
+
inputFile = pathToFileURL(filePath);
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
inputFile = new URL(url);
|
|
64
|
+
}
|
|
65
|
+
logger.info(`using: ${inputFile}`);
|
|
66
|
+
let readableStream;
|
|
67
|
+
if (inputFile.protocol.startsWith("http")) {
|
|
68
|
+
readableStream = await httpReadStream(inputFile.toString());
|
|
69
|
+
}
|
|
70
|
+
else if (inputFile.protocol.startsWith("s3")) {
|
|
71
|
+
readableStream = await s3ReaderStream(inputFile, {});
|
|
72
|
+
}
|
|
73
|
+
else if (inputFile.protocol === 'sftp:') {
|
|
74
|
+
const config = makeSftpConfig(inputFile, opts);
|
|
75
|
+
let remotePath;
|
|
76
|
+
if (inputFile.pathname.match(/\/@latest:\w+$/)) {
|
|
77
|
+
const remoteDir = inputFile.pathname.replace(/\/@latest.*/, "");
|
|
78
|
+
const extension = inputFile.pathname.replace(/.*\/@latest:/, "");
|
|
79
|
+
remotePath = await sftpLatestFile(config, remoteDir, extension);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
remotePath = inputFile.pathname;
|
|
83
|
+
}
|
|
84
|
+
readableStream = await sftpReadStream(remotePath, config);
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
readableStream = fs.createReadStream(inputFile);
|
|
88
|
+
}
|
|
89
|
+
let objectStream;
|
|
90
|
+
if (opts.from) {
|
|
91
|
+
const mod = await loadPlugin(opts.from, 'input');
|
|
92
|
+
objectStream = mod.stream2readable(readableStream);
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
console.error(`Need --from`);
|
|
96
|
+
process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
let resultStream = objectStream;
|
|
99
|
+
if (opts.map) {
|
|
100
|
+
const mod = await loadPlugin(opts.map, 'transform');
|
|
101
|
+
const transformer = await mod.transform(opts.fix);
|
|
102
|
+
resultStream = objectStream.pipe(transformer);
|
|
103
|
+
}
|
|
104
|
+
let outStream;
|
|
105
|
+
if (opts.out === '@slow') {
|
|
106
|
+
outStream = new SlowWritable({ delayMs: 100 });
|
|
107
|
+
}
|
|
108
|
+
else if (opts.out) {
|
|
109
|
+
if (opts.out.startsWith("sftp")) {
|
|
110
|
+
const url = new URL(opts.out);
|
|
111
|
+
const config = makeSftpConfig(url, opts);
|
|
112
|
+
logger.info(`put ${url}`);
|
|
113
|
+
outStream = await sftpWriteStream(url.href, config);
|
|
114
|
+
}
|
|
115
|
+
else if (opts.out.startsWith("s3")) {
|
|
116
|
+
const url = new URL(opts.out);
|
|
117
|
+
logger.info(`put ${url}`);
|
|
118
|
+
outStream = await s3WriterStream(url, {});
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
outStream = fs.createWriteStream(opts.out, { encoding: 'utf-8' });
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
outStream = process.stdout;
|
|
126
|
+
}
|
|
127
|
+
if (opts.to) {
|
|
128
|
+
const mod = await loadPlugin(opts.to, 'output');
|
|
129
|
+
mod.readable2writable(resultStream, outStream);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
function makeSftpConfig(inputFile, opts) {
|
|
133
|
+
let privateKey = undefined;
|
|
134
|
+
if (opts.key) {
|
|
135
|
+
privateKey = fs.readFileSync(opts.key, { encoding: 'utf-8' });
|
|
136
|
+
}
|
|
137
|
+
let config = {
|
|
138
|
+
host: inputFile.hostname,
|
|
139
|
+
port: Number(inputFile.port) ?? 22,
|
|
140
|
+
username: inputFile.username
|
|
141
|
+
};
|
|
142
|
+
if (inputFile.password) {
|
|
143
|
+
config.password = inputFile.password;
|
|
144
|
+
}
|
|
145
|
+
if (privateKey) {
|
|
146
|
+
config.privateKey = privateKey;
|
|
147
|
+
}
|
|
148
|
+
return config;
|
|
149
|
+
}
|
|
150
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
services:
|
|
2
|
+
minio:
|
|
3
|
+
image: minio/minio:RELEASE.2025-04-22T22-12-26Z-cpuv1
|
|
4
|
+
command: server /data -address ":3371" --console-address ":3372"
|
|
5
|
+
environment:
|
|
6
|
+
MINIO_ROOT_USER: minioadmin
|
|
7
|
+
MINIO_ROOT_PASSWORD: minioadmin
|
|
8
|
+
volumes:
|
|
9
|
+
- minio-data:/data
|
|
10
|
+
ports:
|
|
11
|
+
- 3371:3371
|
|
12
|
+
- 3372:3372
|
|
13
|
+
healthcheck:
|
|
14
|
+
test: ["CMD", "curl", "-f", "http://localhost:3371/minio/health/live"]
|
|
15
|
+
interval: 10s
|
|
16
|
+
timeout: 5s
|
|
17
|
+
retries: 5
|
|
18
|
+
start_period: 10s
|
|
19
|
+
|
|
20
|
+
mc:
|
|
21
|
+
image: minio/mc
|
|
22
|
+
depends_on:
|
|
23
|
+
- minio
|
|
24
|
+
# minio:
|
|
25
|
+
# condition: service_healthy
|
|
26
|
+
restart: on-failure
|
|
27
|
+
entrypoint:
|
|
28
|
+
- sh
|
|
29
|
+
- -c
|
|
30
|
+
- |
|
|
31
|
+
sleep 2;
|
|
32
|
+
mc alias set docker http://minio:3371 minioadmin minioadmin;
|
|
33
|
+
mc mb docker/bbl --ignore-existing;
|
|
34
|
+
|
|
35
|
+
volumes:
|
|
36
|
+
minio-data:
|
|
37
|
+
driver: local
|
package/logo.jpg
ADDED
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "marcattacks",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"main": "index.js",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"author": "",
|
|
7
|
+
"repository": "https://codeberg.org/phochste/marcattacks.git",
|
|
8
|
+
"bin": {
|
|
9
|
+
"marcattacks": "./dist/index.js"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"test": "echo \"Error: no test specified\" && exit 1",
|
|
13
|
+
"build:ts": "npx tsc",
|
|
14
|
+
"build:watch": "npx tsc -w",
|
|
15
|
+
"docker:build": "docker build . -t hochstenbach/marcattacks:v0.0.1",
|
|
16
|
+
"docker:run": "docker run --rm -v `pwd`/data:/app/data -it hochstenbach/marcattacks:v0.0.1 --to rdf --map rdf data/sample.xml",
|
|
17
|
+
"docker:push": "docker push hochstenbach/marcattacks:v0.0.1"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [],
|
|
20
|
+
"license": "MIT",
|
|
21
|
+
"description": "",
|
|
22
|
+
"devDependencies": {
|
|
23
|
+
"@types/n3": "^1.26.1",
|
|
24
|
+
"@types/node": "^24.10.1",
|
|
25
|
+
"@types/sax": "^1.2.7",
|
|
26
|
+
"@types/ssh2-sftp-client": "^9.0.5",
|
|
27
|
+
"@types/stream-json": "^1.7.8",
|
|
28
|
+
"eslint": "^9.39.1",
|
|
29
|
+
"prettier": "^3.6.2",
|
|
30
|
+
"ts-node": "^10.9.2",
|
|
31
|
+
"typescript": "^5.9.3"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"@aws-sdk/client-s3": "^3.940.0",
|
|
35
|
+
"commander": "^14.0.2",
|
|
36
|
+
"fast-xml-parser": "^5.3.2",
|
|
37
|
+
"jsonata": "^2.1.0",
|
|
38
|
+
"log4js": "^6.9.1",
|
|
39
|
+
"n3": "^1.26.0",
|
|
40
|
+
"sax": "^1.4.3",
|
|
41
|
+
"ssh2-sftp-client": "^12.0.1",
|
|
42
|
+
"stream-chain": "^3.4.0",
|
|
43
|
+
"stream-json": "^1.9.1",
|
|
44
|
+
"uuid": "^13.0.0"
|
|
45
|
+
}
|
|
46
|
+
}
|
package/plugin/demo.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Readable } from 'stream';
|
|
2
|
+
import * as http from 'http';
|
|
3
|
+
import * as https from 'https';
|
|
4
|
+
import { URL } from 'url';
|
|
5
|
+
|
|
6
|
+
export function httpReadStream(urlString: string): Promise<Readable> {
|
|
7
|
+
return new Promise((resolve, reject) => {
|
|
8
|
+
const url = new URL(urlString);
|
|
9
|
+
const client = url.protocol === 'http:' ? http : https;
|
|
10
|
+
|
|
11
|
+
const req = client.get(url, res => {
|
|
12
|
+
if (res.statusCode && res.statusCode >= 400) {
|
|
13
|
+
reject(new Error('HTTP ' + res.statusCode));
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Follow redirects
|
|
18
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
19
|
+
httpReadStream(res.headers.location).then(resolve).catch(reject);
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
resolve(res);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
req.on('error', reject);
|
|
27
|
+
});
|
|
28
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
import { program } from 'commander';
|
|
5
|
+
import { loadPlugin } from './plugin-loader.js';
|
|
6
|
+
import { sftpReadStream , sftpWriteStream , sftpLatestFile , type SftpConfig } from './sftpstream.js';
|
|
7
|
+
import { httpReadStream } from './httpstream.js';
|
|
8
|
+
import { Readable } from 'stream';
|
|
9
|
+
import { pathToFileURL } from "node:url";
|
|
10
|
+
import type { Transform, Writable } from 'node:stream';
|
|
11
|
+
import { SlowWritable } from './slow-writable.js';
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import fs from 'fs';
|
|
14
|
+
import { s3ReaderStream, s3WriterStream } from './s3stream.js';
|
|
15
|
+
|
|
16
|
+
log4js.configure({
|
|
17
|
+
appenders: {
|
|
18
|
+
err: {
|
|
19
|
+
type: "stderr" ,
|
|
20
|
+
layout: {
|
|
21
|
+
type: "pattern",
|
|
22
|
+
pattern: "%[%d %p %f{1} %m%]"
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
categories: {
|
|
27
|
+
default: { appenders: ["err"], level: "off" , enableCallStack: true }
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
program.version('0.1.0')
|
|
32
|
+
.argument('<file>')
|
|
33
|
+
.option('-f,--from <from>','input type','xml')
|
|
34
|
+
.option('-t,--to <output>','output type','json')
|
|
35
|
+
.option('-m,--map <map>','data mapper','json')
|
|
36
|
+
.option('--fix <what>','jsonata')
|
|
37
|
+
.option('-o,--out <file>','output file')
|
|
38
|
+
.option('--key <keyfile>', 'private key file')
|
|
39
|
+
.option('--info','output debugging messages')
|
|
40
|
+
.option('--debug','output more debugging messages')
|
|
41
|
+
.option('--trace','output much more debugging messages');
|
|
42
|
+
|
|
43
|
+
program.parse(process.argv);
|
|
44
|
+
|
|
45
|
+
const opts = program.opts();
|
|
46
|
+
const logger = log4js.getLogger();
|
|
47
|
+
|
|
48
|
+
if (opts.info) {
|
|
49
|
+
logger.level = "info";
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (opts.debug) {
|
|
53
|
+
logger.level = "debug";
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (opts.trace) {
|
|
57
|
+
logger.level = "trace";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
main();
|
|
61
|
+
|
|
62
|
+
async function main() : Promise<void> {
|
|
63
|
+
const url = program.args[0];
|
|
64
|
+
|
|
65
|
+
if (! url) {
|
|
66
|
+
console.error(`need an input file`);
|
|
67
|
+
process.exit(2);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
let inputFile : URL;
|
|
71
|
+
|
|
72
|
+
if (fs.existsSync(url)) {
|
|
73
|
+
const filePath = path.resolve(process.cwd(), url);
|
|
74
|
+
inputFile = pathToFileURL(filePath);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
inputFile = new URL(url);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
logger.info(`using: ${inputFile}`);
|
|
81
|
+
|
|
82
|
+
let readableStream;
|
|
83
|
+
|
|
84
|
+
if (inputFile.protocol.startsWith("http")) {
|
|
85
|
+
readableStream = await httpReadStream(inputFile.toString());
|
|
86
|
+
}
|
|
87
|
+
else if (inputFile.protocol.startsWith("s3")) {
|
|
88
|
+
readableStream = await s3ReaderStream(inputFile,{});
|
|
89
|
+
}
|
|
90
|
+
else if (inputFile.protocol === 'sftp:') {
|
|
91
|
+
const config = makeSftpConfig(inputFile,opts);
|
|
92
|
+
|
|
93
|
+
let remotePath;
|
|
94
|
+
|
|
95
|
+
if (inputFile.pathname.match(/\/@latest:\w+$/)) {
|
|
96
|
+
const remoteDir = inputFile.pathname.replace(/\/@latest.*/,"");
|
|
97
|
+
const extension = inputFile.pathname.replace(/.*\/@latest:/,"");
|
|
98
|
+
remotePath = await sftpLatestFile(config,remoteDir,extension);
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
remotePath = inputFile.pathname;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
readableStream = await sftpReadStream(remotePath, config);
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
readableStream = fs.createReadStream(inputFile);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
let objectStream : Readable;
|
|
111
|
+
|
|
112
|
+
if (opts.from) {
|
|
113
|
+
const mod = await loadPlugin(opts.from,'input');
|
|
114
|
+
objectStream = mod.stream2readable(readableStream);
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
console.error(`Need --from`);
|
|
118
|
+
process.exit(1);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let resultStream = objectStream;
|
|
122
|
+
|
|
123
|
+
if (opts.map) {
|
|
124
|
+
const mod = await loadPlugin(opts.map,'transform');
|
|
125
|
+
const transformer : Transform = await mod.transform(opts.fix);
|
|
126
|
+
resultStream = objectStream.pipe(transformer);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let outStream : Writable;
|
|
130
|
+
|
|
131
|
+
if (opts.out === '@slow') {
|
|
132
|
+
outStream = new SlowWritable({ delayMs: 100 });
|
|
133
|
+
}
|
|
134
|
+
else if (opts.out) {
|
|
135
|
+
if (opts.out.startsWith("sftp")) {
|
|
136
|
+
const url = new URL(opts.out);
|
|
137
|
+
const config = makeSftpConfig(url,opts);
|
|
138
|
+
logger.info(`put ${url}`);
|
|
139
|
+
outStream = await sftpWriteStream(url.href, config);
|
|
140
|
+
}
|
|
141
|
+
else if (opts.out.startsWith("s3")) {
|
|
142
|
+
const url = new URL(opts.out);
|
|
143
|
+
logger.info(`put ${url}`);
|
|
144
|
+
outStream = await s3WriterStream(url,{});
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
outStream = fs.createWriteStream(opts.out, { encoding: 'utf-8'});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
outStream = process.stdout;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (opts.to) {
|
|
155
|
+
const mod = await loadPlugin(opts.to,'output');
|
|
156
|
+
mod.readable2writable(resultStream, outStream);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function makeSftpConfig(inputFile: URL, opts: any) : SftpConfig {
|
|
161
|
+
let privateKey : string | undefined = undefined;
|
|
162
|
+
|
|
163
|
+
if (opts.key) {
|
|
164
|
+
privateKey = fs.readFileSync(opts.key,{ encoding: 'utf-8'});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
let config: SftpConfig = {
|
|
168
|
+
host: inputFile.hostname,
|
|
169
|
+
port: Number(inputFile.port) ?? 22,
|
|
170
|
+
username: inputFile.username
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
if (inputFile.password) { config.password = inputFile.password }
|
|
174
|
+
if (privateKey) { config.privateKey = privateKey}
|
|
175
|
+
|
|
176
|
+
return config;
|
|
177
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { Readable } from "stream";
|
|
2
|
+
import * as readline from 'node:readline'
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
|
|
5
|
+
const logger = log4js.getLogger();
|
|
6
|
+
|
|
7
|
+
export function stream2readable(stream: Readable) : Readable {
|
|
8
|
+
let recordNum = 0;
|
|
9
|
+
|
|
10
|
+
const rl = readline.createInterface({input: stream, crlfDelay: Infinity});
|
|
11
|
+
|
|
12
|
+
let sourcePaused = false;
|
|
13
|
+
|
|
14
|
+
const readableStream = new Readable({
|
|
15
|
+
read() {
|
|
16
|
+
if (sourcePaused) {
|
|
17
|
+
logger.debug("backpressure off");
|
|
18
|
+
rl.resume();
|
|
19
|
+
sourcePaused = false;
|
|
20
|
+
}
|
|
21
|
+
} ,
|
|
22
|
+
objectMode: true
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
let rec : string[][] = [];
|
|
26
|
+
let previd : string = "";
|
|
27
|
+
|
|
28
|
+
rl.on('line', (line) => {
|
|
29
|
+
const [id,...rest] = line.split(" ");
|
|
30
|
+
const data = rest.join(" ");
|
|
31
|
+
|
|
32
|
+
if (previd && previd !== id) {
|
|
33
|
+
const ok = readableStream.push({
|
|
34
|
+
record: rec
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
if (!ok) {
|
|
38
|
+
logger.debug("backpressure on");
|
|
39
|
+
rl.pause();
|
|
40
|
+
sourcePaused = true;
|
|
41
|
+
}
|
|
42
|
+
rec = [];
|
|
43
|
+
recordNum++;
|
|
44
|
+
|
|
45
|
+
if (recordNum % 1000 === 0) {
|
|
46
|
+
logger.info(`record: ${recordNum}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const tag = data?.substring(0,3);
|
|
51
|
+
const ind1 = data?.substring(3,4);
|
|
52
|
+
const ind2 = data?.substring(4,5);
|
|
53
|
+
const sf = data?.substring(8);
|
|
54
|
+
const parts = sf.split(/\$\$(.)/);
|
|
55
|
+
|
|
56
|
+
if (tag == 'FMT' || tag === 'LDR' || tag.startsWith("00")) {
|
|
57
|
+
rec.push([
|
|
58
|
+
tag,ind1,ind2
|
|
59
|
+
].concat(["_"].concat(parts)));
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
rec.push([
|
|
63
|
+
tag,ind1,ind2
|
|
64
|
+
].concat(parts.slice(1)));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
previd = id!;
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
rl.on('close', () => {
|
|
71
|
+
readableStream.push({
|
|
72
|
+
record: rec
|
|
73
|
+
});
|
|
74
|
+
recordNum++;
|
|
75
|
+
if (recordNum % 1000 === 0) {
|
|
76
|
+
logger.info(`record: ${recordNum}`);
|
|
77
|
+
}
|
|
78
|
+
readableStream.push(null);
|
|
79
|
+
logger.info(`processed ${recordNum} records`);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
return readableStream;
|
|
83
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Readable } from "stream";
|
|
2
|
+
import streamArray from "stream-json/streamers/StreamArray.js";
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
|
|
5
|
+
const logger = log4js.getLogger();
|
|
6
|
+
|
|
7
|
+
export function stream2readable(stream: Readable) : Readable {
|
|
8
|
+
let recordNum = 0;
|
|
9
|
+
|
|
10
|
+
const pipeline = stream.pipe(streamArray.withParser());
|
|
11
|
+
|
|
12
|
+
let sourcePaused = false;
|
|
13
|
+
|
|
14
|
+
const readableStream = new Readable({
|
|
15
|
+
read() {
|
|
16
|
+
if (sourcePaused) {
|
|
17
|
+
logger.debug("backpressure off");
|
|
18
|
+
pipeline.resume();
|
|
19
|
+
sourcePaused = false;
|
|
20
|
+
}
|
|
21
|
+
} ,
|
|
22
|
+
objectMode: true
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
pipeline.on('data', (data: any) => {
|
|
26
|
+
const ok = readableStream.push(data.value);
|
|
27
|
+
|
|
28
|
+
if (!ok) {
|
|
29
|
+
logger.debug("backpressure on")
|
|
30
|
+
pipeline.pause();
|
|
31
|
+
sourcePaused = true;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
recordNum++;
|
|
35
|
+
|
|
36
|
+
if (recordNum % 1000 === 0) {
|
|
37
|
+
logger.info(`record: ${recordNum}`);
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
pipeline.on('end', () => {
|
|
42
|
+
logger.info(`processed ${recordNum} records`);
|
|
43
|
+
readableStream.push(null);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
return readableStream;
|
|
47
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { Readable } from "stream";
|
|
2
|
+
import * as readline from 'node:readline'
|
|
3
|
+
import log4js from 'log4js';
|
|
4
|
+
|
|
5
|
+
const logger = log4js.getLogger();
|
|
6
|
+
|
|
7
|
+
export function stream2readable(stream: Readable) : Readable {
|
|
8
|
+
let recordNum = 0;
|
|
9
|
+
|
|
10
|
+
const rl = readline.createInterface({input: stream, crlfDelay: Infinity});
|
|
11
|
+
|
|
12
|
+
let sourcePaused = false;
|
|
13
|
+
|
|
14
|
+
const readableStream = new Readable({
|
|
15
|
+
read() {
|
|
16
|
+
if (sourcePaused) {
|
|
17
|
+
logger.debug("backpressure off");
|
|
18
|
+
rl.resume();
|
|
19
|
+
sourcePaused = false;
|
|
20
|
+
}
|
|
21
|
+
} ,
|
|
22
|
+
objectMode: true
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
rl.on('line', (line) => {
|
|
26
|
+
const ok = readableStream.push(JSON.parse(line));
|
|
27
|
+
|
|
28
|
+
if (!ok) {
|
|
29
|
+
logger.debug("backpressure on");
|
|
30
|
+
rl.pause();
|
|
31
|
+
sourcePaused = true;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
recordNum++;
|
|
35
|
+
|
|
36
|
+
if (recordNum % 1000 === 0) {
|
|
37
|
+
logger.info(`record: ${recordNum}`);
|
|
38
|
+
}
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
rl.on('close', () => {
|
|
42
|
+
readableStream.push(null);
|
|
43
|
+
logger.info(`processed ${recordNum} records`);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
return readableStream;
|
|
47
|
+
}
|