@naturalcycles/nodejs-lib 12.44.0 → 12.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/dist/diff/tableDiff.d.ts +2 -1
- package/dist/got/got.model.d.ts +2 -1
- package/dist/index.d.ts +4 -2
- package/dist/index.js +6 -2
- package/dist/stream/ndjson/ndjsonMap.d.ts +10 -8
- package/dist/stream/ndjson/ndjsonMap.js +3 -18
- package/dist/stream/ndjson/transformJsonParse.d.ts +3 -2
- package/dist/stream/ndjson/transformToNDJson.d.ts +4 -2
- package/dist/stream/transform/transformBuffer.d.ts +1 -1
- package/dist/stream/transform/transformLogProgress.d.ts +4 -3
- package/dist/stream/transform/transformLogProgress.js +1 -1
- package/dist/stream/transform/transformMap.d.ts +4 -3
- package/dist/stream/transform/worker/transformMultiThreaded.d.ts +2 -1
- package/dist/validation/joi/joi.shared.schemas.d.ts +1 -1
- package/dist/validation/joi/joi.shared.schemas.js +3 -6
- package/dist/validation/joi/joi.validation.error.d.ts +3 -0
- package/dist/validation/joi/joi.validation.util.js +8 -2
- package/dist/validation/joi/string.extensions.d.ts +10 -1
- package/dist/validation/joi/string.extensions.js +34 -0
- package/dist/validation/sanitize.util.d.ts +8 -0
- package/dist/validation/sanitize.util.js +13 -0
- package/package.json +3 -1
- package/src/diff/tableDiff.ts +2 -6
- package/src/got/got.model.ts +2 -1
- package/src/index.ts +5 -0
- package/src/stream/ndjson/ndjsonMap.ts +14 -38
- package/src/stream/ndjson/transformJsonParse.ts +3 -2
- package/src/stream/ndjson/transformToNDJson.ts +4 -2
- package/src/stream/transform/transformBuffer.ts +1 -3
- package/src/stream/transform/transformLogProgress.ts +6 -6
- package/src/stream/transform/transformMap.ts +4 -3
- package/src/stream/transform/worker/transformMultiThreaded.ts +2 -2
- package/src/validation/joi/joi.shared.schemas.ts +3 -6
- package/src/validation/joi/joi.validation.error.ts +3 -0
- package/src/validation/joi/joi.validation.util.ts +11 -3
- package/src/validation/joi/string.extensions.ts +50 -3
- package/src/validation/sanitize.util.ts +12 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,31 @@
|
|
|
1
|
+
# [12.47.0](https://github.com/NaturalCycles/nodejs-lib/compare/v12.46.0...v12.47.0) (2021-10-25)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* idSchema to allow uppercase characters too ([80111c3](https://github.com/NaturalCycles/nodejs-lib/commit/80111c3d601fbb6e417b7ebb38b42ae1451c8866))
|
|
7
|
+
|
|
8
|
+
# [12.46.0](https://github.com/NaturalCycles/nodejs-lib/compare/v12.45.1...v12.46.0) (2021-10-25)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* stringSchema.stripHTML extension, export sanitizeHTML ([586d576](https://github.com/NaturalCycles/nodejs-lib/commit/586d576585652af822d974607ee913c16a81691f))
|
|
14
|
+
|
|
15
|
+
## [12.45.1](https://github.com/NaturalCycles/nodejs-lib/compare/v12.45.0...v12.45.1) (2021-10-25)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Bug Fixes
|
|
19
|
+
|
|
20
|
+
* make JoiValidationError.annotation non-enumerable ([9337fd3](https://github.com/NaturalCycles/nodejs-lib/commit/9337fd3e1a60deb0d9e91b17cf5e7b42f72b5459))
|
|
21
|
+
|
|
22
|
+
# [12.45.0](https://github.com/NaturalCycles/nodejs-lib/compare/v12.44.0...v12.45.0) (2021-10-22)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
### Features
|
|
26
|
+
|
|
27
|
+
* ndjsonMap to take mapper as option ([eb06ae1](https://github.com/NaturalCycles/nodejs-lib/commit/eb06ae1e6ff02ef0338e764e5f7d39142ede4760))
|
|
28
|
+
|
|
1
29
|
# [12.44.0](https://github.com/NaturalCycles/nodejs-lib/compare/v12.43.0...v12.44.0) (2021-10-22)
|
|
2
30
|
|
|
3
31
|
|
package/dist/diff/tableDiff.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AnyObject } from '@naturalcycles/js-lib';
|
|
1
2
|
export interface TableDiffOptions {
|
|
2
3
|
/**
|
|
3
4
|
* @default false
|
|
@@ -29,4 +30,4 @@ export interface TableDiffOptions {
|
|
|
29
30
|
*
|
|
30
31
|
* Function is located in nodejs-lib (not js-lib), because it's planned to improve in the future and add e.g colors (via chalk).
|
|
31
32
|
*/
|
|
32
|
-
export declare function tableDiff(a:
|
|
33
|
+
export declare function tableDiff(a: AnyObject, b: AnyObject, opt?: TableDiffOptions): void;
|
package/dist/got/got.model.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AnyObject } from '@naturalcycles/js-lib';
|
|
1
2
|
import type { Options } from 'got';
|
|
2
3
|
export interface GetGotOptions extends Options {
|
|
3
4
|
/**
|
|
@@ -33,7 +34,7 @@ export interface GetGotOptions extends Options {
|
|
|
33
34
|
*/
|
|
34
35
|
maxResponseLength?: number;
|
|
35
36
|
}
|
|
36
|
-
export interface GotRequestContext extends
|
|
37
|
+
export interface GotRequestContext extends AnyObject {
|
|
37
38
|
/**
|
|
38
39
|
* Millisecond-timestamp of when the request was started. To be able to count "time spent".
|
|
39
40
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -17,6 +17,7 @@ import { SlackApiBody, SlackMessage, SlackMessagePrefixHook, SlackMessageProps,
|
|
|
17
17
|
import { NDJsonStats } from './stream/ndjson/ndjson.model';
|
|
18
18
|
import { ndJsonFileRead } from './stream/ndjson/ndJsonFileRead';
|
|
19
19
|
import { ndJsonFileWrite } from './stream/ndjson/ndJsonFileWrite';
|
|
20
|
+
import { ndjsonMap } from './stream/ndjson/ndjsonMap';
|
|
20
21
|
import { ndjsonStreamForEach, NDJSONStreamForEachOptions } from './stream/ndjson/ndjsonStreamForEach';
|
|
21
22
|
import { pipelineFromNDJsonFile, PipelineFromNDJsonFileOptions } from './stream/ndjson/pipelineFromNDJsonFile';
|
|
22
23
|
import { pipelineToNDJsonFile, PipelineToNDJsonFileOptions } from './stream/ndjson/pipelineToNDJsonFile';
|
|
@@ -63,5 +64,6 @@ import { AnySchemaTyped, ArraySchemaTyped, BooleanSchemaTyped, NumberSchemaTyped
|
|
|
63
64
|
import { anyObjectSchema, anySchema, arraySchema, oneOfSchema, binarySchema, booleanDefaultToFalseSchema, booleanSchema, dateStringSchema, emailSchema, baseDBEntitySchema, savedDBEntitySchema, idSchema, integerSchema, ipAddressSchema, numberSchema, objectSchema, percentageSchema, semVerSchema, SEM_VER_PATTERN, slugSchema, stringSchema, unixTimestampSchema, urlSchema, userAgentSchema, utcOffsetSchema, verSchema } from './validation/joi/joi.shared.schemas';
|
|
64
65
|
import { JoiValidationError, JoiValidationErrorData } from './validation/joi/joi.validation.error';
|
|
65
66
|
import { convert, getValidationResult, isValid, JoiValidationResult, undefinedIfInvalid, validate } from './validation/joi/joi.validation.util';
|
|
66
|
-
|
|
67
|
-
export {
|
|
67
|
+
import { sanitizeHTML, SanitizeHTMLOptions } from './validation/sanitize.util';
|
|
68
|
+
export type { JoiValidationErrorData, JoiValidationResult, ValidationErrorItem, ExtendedJoi, SchemaTyped, AnySchema, AnySchemaTyped, ArraySchemaTyped, BooleanSchemaTyped, NumberSchemaTyped, ObjectSchemaTyped, StringSchemaTyped, IDebug, IDebugger, SlackServiceCfg, SlackMessage, SlackMessageProps, SlackApiBody, SlackMessagePrefixHook, ReadableTyped, WritableTyped, TransformTyped, PipelineFromNDJsonFileOptions, PipelineToNDJsonFileOptions, TransformJsonParseOptions, TransformToNDJsonOptions, TransformMapOptions, TransformMapSyncOptions, NDJSONStreamForEachOptions, TransformOptions, TransformLogProgressOptions, TransformMultiThreadedOptions, WorkerClassInterface, WorkerInput, WorkerOutput, TableDiffOptions, InspectAnyOptions, Got, GetGotOptions, AfterResponseHook, BeforeErrorHook, BeforeRequestHook, AjvValidationOptions, AjvSchemaCfg, AjvValidationErrorData, SanitizeHTMLOptions, };
|
|
69
|
+
export { JoiValidationError, validate, getValidationResult, isValid, undefinedIfInvalid, convert, Joi, booleanSchema, booleanDefaultToFalseSchema, stringSchema, numberSchema, integerSchema, percentageSchema, dateStringSchema, arraySchema, binarySchema, objectSchema, oneOfSchema, anySchema, anyObjectSchema, baseDBEntitySchema, savedDBEntitySchema, idSchema, unixTimestampSchema, verSchema, emailSchema, SEM_VER_PATTERN, semVerSchema, userAgentSchema, utcOffsetSchema, ipAddressSchema, slugSchema, urlSchema, processSharedUtil, zipBuffer, gzipBuffer, unzipBuffer, gunzipBuffer, zipString, gzipString, unzipToString, gunzipToString, requireEnvKeys, requireFileToExist, LRUMemoCache, stringId, stringIdAsync, stringIdUnsafe, ALPHABET_NUMBER, ALPHABET_LOWERCASE, ALPHABET_UPPERCASE, ALPHABET_ALPHANUMERIC_LOWERCASE, ALPHABET_ALPHANUMERIC_UPPERCASE, ALPHABET_ALPHANUMERIC, md5, hash, hashAsBuffer, md5AsBuffer, stringToBase64, base64ToString, bufferToBase64, base64ToBuffer, Debug, DebugLogLevel, getSecretMap, setSecretMap, loadSecretsFromEnv, loadSecretsFromJsonFile, removeSecretsFromEnv, secret, secretOptional, memoryUsage, memoryUsageFull, SlackService, slackDefaultMessagePrefixHook, readableCreate, readableFrom, readableFromArray, readableToArray, readableForEach, readableForEachSync, readableMap, readableMapToArray, _pipeline, transformBuffer, ndjsonMap, ndJsonFileRead, ndJsonFileWrite, ndjsonStreamForEach, pipelineFromNDJsonFile, pipelineToNDJsonFile, NDJsonStats, streamToNDJsonFile, transformJsonParse, bufferReviver, transformToNDJson, transformFilter, transformFilterSync, transformMap, transformMapSync, transformMapSimple, transformNoOp, writableForEach, writablePushToArray, transformSplit, transformToString, transformToArray, transformTap, transformLogProgress, transformLimit, writableVoid, writableFork, transformMultiThreaded, BaseWorkerClass, tableDiff, inspectAny, getGot, HTTPError, TimeoutError, _chunkBuffer, Ajv, getAjv, AjvSchema, AjvValidationError, readJsonSchemas, readAjvSchemas, hasColors, sanitizeHTML, };
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ALPHABET_LOWERCASE = exports.ALPHABET_NUMBER = exports.stringIdUnsafe = exports.stringIdAsync = exports.stringId = exports.LRUMemoCache = exports.requireFileToExist = exports.requireEnvKeys = exports.gunzipToString = exports.unzipToString = exports.gzipString = exports.zipString = exports.gunzipBuffer = exports.unzipBuffer = exports.gzipBuffer = exports.zipBuffer = exports.processSharedUtil = exports.urlSchema = exports.slugSchema = exports.ipAddressSchema = exports.utcOffsetSchema = exports.userAgentSchema = exports.semVerSchema = exports.SEM_VER_PATTERN = exports.emailSchema = exports.verSchema = exports.unixTimestampSchema = exports.idSchema = exports.savedDBEntitySchema = exports.baseDBEntitySchema = exports.anyObjectSchema = exports.anySchema = exports.oneOfSchema = exports.objectSchema = exports.binarySchema = exports.arraySchema = exports.dateStringSchema = exports.percentageSchema = exports.integerSchema = exports.numberSchema = exports.stringSchema = exports.booleanDefaultToFalseSchema = exports.booleanSchema = exports.Joi = exports.convert = exports.undefinedIfInvalid = exports.isValid = exports.getValidationResult = exports.validate = exports.JoiValidationError = void 0;
|
|
4
|
-
exports.
|
|
5
|
-
exports.hasColors = exports.readAjvSchemas = exports.readJsonSchemas = exports.AjvValidationError = exports.AjvSchema = exports.getAjv = exports.Ajv = exports._chunkBuffer = exports.TimeoutError = exports.HTTPError = exports.getGot = exports.inspectAny = exports.tableDiff = exports.BaseWorkerClass = exports.transformMultiThreaded = exports.writableFork = exports.writableVoid = exports.transformLimit = exports.transformLogProgress = exports.transformTap = exports.transformToArray = exports.transformToString = exports.transformSplit = exports.writablePushToArray = exports.writableForEach = exports.transformNoOp = void 0;
|
|
4
|
+
exports.transformMapSync = exports.transformMap = exports.transformFilterSync = exports.transformFilter = exports.transformToNDJson = exports.bufferReviver = exports.transformJsonParse = exports.streamToNDJsonFile = exports.NDJsonStats = exports.pipelineToNDJsonFile = exports.pipelineFromNDJsonFile = exports.ndjsonStreamForEach = exports.ndJsonFileWrite = exports.ndJsonFileRead = exports.ndjsonMap = exports.transformBuffer = exports._pipeline = exports.readableMapToArray = exports.readableMap = exports.readableForEachSync = exports.readableForEach = exports.readableToArray = exports.readableFromArray = exports.readableFrom = exports.readableCreate = exports.slackDefaultMessagePrefixHook = exports.SlackService = exports.memoryUsageFull = exports.memoryUsage = exports.secretOptional = exports.secret = exports.removeSecretsFromEnv = exports.loadSecretsFromJsonFile = exports.loadSecretsFromEnv = exports.setSecretMap = exports.getSecretMap = exports.DebugLogLevel = exports.Debug = exports.base64ToBuffer = exports.bufferToBase64 = exports.base64ToString = exports.stringToBase64 = exports.md5AsBuffer = exports.hashAsBuffer = exports.hash = exports.md5 = exports.ALPHABET_ALPHANUMERIC = exports.ALPHABET_ALPHANUMERIC_UPPERCASE = exports.ALPHABET_ALPHANUMERIC_LOWERCASE = exports.ALPHABET_UPPERCASE = void 0;
|
|
5
|
+
exports.sanitizeHTML = exports.hasColors = exports.readAjvSchemas = exports.readJsonSchemas = exports.AjvValidationError = exports.AjvSchema = exports.getAjv = exports.Ajv = exports._chunkBuffer = exports.TimeoutError = exports.HTTPError = exports.getGot = exports.inspectAny = exports.tableDiff = exports.BaseWorkerClass = exports.transformMultiThreaded = exports.writableFork = exports.writableVoid = exports.transformLimit = exports.transformLogProgress = exports.transformTap = exports.transformToArray = exports.transformToString = exports.transformSplit = exports.writablePushToArray = exports.writableForEach = exports.transformNoOp = exports.transformMapSimple = void 0;
|
|
6
6
|
const ajv_1 = require("ajv");
|
|
7
7
|
exports.Ajv = ajv_1.default;
|
|
8
8
|
const got_1 = require("got");
|
|
@@ -59,6 +59,8 @@ const ndJsonFileRead_1 = require("./stream/ndjson/ndJsonFileRead");
|
|
|
59
59
|
Object.defineProperty(exports, "ndJsonFileRead", { enumerable: true, get: function () { return ndJsonFileRead_1.ndJsonFileRead; } });
|
|
60
60
|
const ndJsonFileWrite_1 = require("./stream/ndjson/ndJsonFileWrite");
|
|
61
61
|
Object.defineProperty(exports, "ndJsonFileWrite", { enumerable: true, get: function () { return ndJsonFileWrite_1.ndJsonFileWrite; } });
|
|
62
|
+
const ndjsonMap_1 = require("./stream/ndjson/ndjsonMap");
|
|
63
|
+
Object.defineProperty(exports, "ndjsonMap", { enumerable: true, get: function () { return ndjsonMap_1.ndjsonMap; } });
|
|
62
64
|
const ndjsonStreamForEach_1 = require("./stream/ndjson/ndjsonStreamForEach");
|
|
63
65
|
Object.defineProperty(exports, "ndjsonStreamForEach", { enumerable: true, get: function () { return ndjsonStreamForEach_1.ndjsonStreamForEach; } });
|
|
64
66
|
const pipelineFromNDJsonFile_1 = require("./stream/ndjson/pipelineFromNDJsonFile");
|
|
@@ -187,3 +189,5 @@ Object.defineProperty(exports, "getValidationResult", { enumerable: true, get: f
|
|
|
187
189
|
Object.defineProperty(exports, "isValid", { enumerable: true, get: function () { return joi_validation_util_1.isValid; } });
|
|
188
190
|
Object.defineProperty(exports, "undefinedIfInvalid", { enumerable: true, get: function () { return joi_validation_util_1.undefinedIfInvalid; } });
|
|
189
191
|
Object.defineProperty(exports, "validate", { enumerable: true, get: function () { return joi_validation_util_1.validate; } });
|
|
192
|
+
const sanitize_util_1 = require("./validation/sanitize.util");
|
|
193
|
+
Object.defineProperty(exports, "sanitizeHTML", { enumerable: true, get: function () { return sanitize_util_1.sanitizeHTML; } });
|
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
1
|
+
import { AsyncMapper } from '@naturalcycles/js-lib';
|
|
2
|
+
import { TransformMapOptions, TransformLogProgressOptions } from '../..';
|
|
3
|
+
export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOptions<IN, OUT>, TransformLogProgressOptions<IN> {
|
|
3
4
|
inputFilePath: string;
|
|
4
5
|
outputFilePath: string;
|
|
5
|
-
mapperFilePath: string;
|
|
6
6
|
limitInput?: number;
|
|
7
7
|
limitOutput?: number;
|
|
8
|
-
/**
|
|
9
|
-
* @default 1000
|
|
10
|
-
*/
|
|
11
|
-
logEveryInput?: number;
|
|
12
8
|
/**
|
|
13
9
|
* @default 100_000
|
|
14
10
|
*/
|
|
15
11
|
logEveryOutput?: number;
|
|
12
|
+
/**
|
|
13
|
+
* Defaults to `true` for ndjsonMap
|
|
14
|
+
*
|
|
15
|
+
* @default true
|
|
16
|
+
*/
|
|
17
|
+
flattenArrayOutput?: boolean;
|
|
16
18
|
}
|
|
17
19
|
/**
|
|
18
20
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
19
21
|
* Zips output file automatically, if it ends with `.gz`.
|
|
20
22
|
*/
|
|
21
|
-
export declare function ndjsonMap<IN = any, OUT = any>(opt: NDJSONMapOptions<IN, OUT>): Promise<void>;
|
|
23
|
+
export declare function ndjsonMap<IN = any, OUT = any>(mapper: AsyncMapper<IN, OUT>, opt: NDJSONMapOptions<IN, OUT>): Promise<void>;
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ndjsonMap = void 0;
|
|
4
4
|
const fs_1 = require("fs");
|
|
5
|
-
const path = require("path");
|
|
6
5
|
const zlib_1 = require("zlib");
|
|
7
6
|
const js_lib_1 = require("@naturalcycles/js-lib");
|
|
8
7
|
const __1 = require("../..");
|
|
@@ -10,27 +9,13 @@ const __1 = require("../..");
|
|
|
10
9
|
* Unzips input file automatically, if it ends with `.gz`.
|
|
11
10
|
* Zips output file automatically, if it ends with `.gz`.
|
|
12
11
|
*/
|
|
13
|
-
async function ndjsonMap(opt) {
|
|
14
|
-
const { inputFilePath, outputFilePath,
|
|
12
|
+
async function ndjsonMap(mapper, opt) {
|
|
13
|
+
const { inputFilePath, outputFilePath, logEveryOutput = 100000, limitInput, limitOutput } = opt;
|
|
15
14
|
(0, __1.requireFileToExist)(inputFilePath);
|
|
16
|
-
(0, __1.requireFileToExist)(mapperFilePath);
|
|
17
|
-
const resolvedMapperPath = path.resolve(mapperFilePath);
|
|
18
15
|
console.log({
|
|
19
16
|
inputFilePath,
|
|
20
17
|
outputFilePath,
|
|
21
|
-
mapperFilePath,
|
|
22
|
-
resolvedMapperPath,
|
|
23
18
|
});
|
|
24
|
-
// This is to allow importing *.ts mappers
|
|
25
|
-
try {
|
|
26
|
-
require('ts-node/register/transpile-only');
|
|
27
|
-
require('tsconfig-paths/register');
|
|
28
|
-
}
|
|
29
|
-
catch { } // require if exists
|
|
30
|
-
const { mapper } = require(resolvedMapperPath);
|
|
31
|
-
if (!mapper) {
|
|
32
|
-
throw new Error(`Mapper file should export "mapper" function`);
|
|
33
|
-
}
|
|
34
19
|
const transformUnzip = inputFilePath.endsWith('.gz') ? [(0, zlib_1.createUnzip)()] : [];
|
|
35
20
|
const transformZip = outputFilePath.endsWith('.gz') ? [(0, zlib_1.createGzip)()] : [];
|
|
36
21
|
await (0, __1._pipeline)([
|
|
@@ -39,7 +24,7 @@ async function ndjsonMap(opt) {
|
|
|
39
24
|
(0, __1.transformSplit)(),
|
|
40
25
|
(0, __1.transformJsonParse)(),
|
|
41
26
|
(0, __1.transformLimit)(limitInput),
|
|
42
|
-
(0, __1.transformLogProgress)({ metric: 'read',
|
|
27
|
+
(0, __1.transformLogProgress)({ metric: 'read', ...opt }),
|
|
43
28
|
(0, __1.transformMap)(mapper, {
|
|
44
29
|
flattenArrayOutput: true,
|
|
45
30
|
errorMode: js_lib_1.ErrorMode.SUPPRESS,
|
|
@@ -3,8 +3,9 @@ import { Reviver } from '@naturalcycles/js-lib';
|
|
|
3
3
|
import { TransformTyped } from '../stream.model';
|
|
4
4
|
export interface TransformJsonParseOptions {
|
|
5
5
|
/**
|
|
6
|
-
* @default true
|
|
7
6
|
* If true - will throw an error on JSON.parse / stringify error
|
|
7
|
+
*
|
|
8
|
+
* @default true
|
|
8
9
|
*/
|
|
9
10
|
strict?: boolean;
|
|
10
11
|
reviver?: Reviver;
|
|
@@ -23,5 +24,5 @@ export interface TransformJsonParseOptions {
|
|
|
23
24
|
* consumeYourStream...
|
|
24
25
|
* [)
|
|
25
26
|
*/
|
|
26
|
-
export declare function transformJsonParse<OUT =
|
|
27
|
+
export declare function transformJsonParse<OUT = any>(opt?: TransformJsonParseOptions): TransformTyped<string | Buffer, OUT>;
|
|
27
28
|
export declare const bufferReviver: Reviver;
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import { TransformTyped } from '../stream.model';
|
|
2
2
|
export interface TransformToNDJsonOptions {
|
|
3
3
|
/**
|
|
4
|
-
* @default true
|
|
5
4
|
* If true - will throw an error on JSON.parse / stringify error
|
|
5
|
+
*
|
|
6
|
+
* @default true
|
|
6
7
|
*/
|
|
7
8
|
strict?: boolean;
|
|
8
9
|
/**
|
|
9
|
-
* @default false
|
|
10
10
|
* If true - will run `sortObjectDeep()` on each object to achieve deterministic sort
|
|
11
|
+
*
|
|
12
|
+
* @default false
|
|
11
13
|
*/
|
|
12
14
|
sortObjects?: boolean;
|
|
13
15
|
/**
|
|
@@ -7,4 +7,4 @@ export interface TransformBufferOptions extends TransformOptions {
|
|
|
7
7
|
*
|
|
8
8
|
* @default batchSize is 10
|
|
9
9
|
*/
|
|
10
|
-
export declare function transformBuffer<IN =
|
|
10
|
+
export declare function transformBuffer<IN = any>(opt: TransformBufferOptions): TransformTyped<IN, IN[]>;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AnyObject } from '@naturalcycles/js-lib';
|
|
1
2
|
import { TransformOptions, TransformTyped } from '../stream.model';
|
|
2
3
|
export interface TransformLogProgressOptions<IN = any> extends TransformOptions {
|
|
3
4
|
/**
|
|
@@ -9,7 +10,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
9
10
|
/**
|
|
10
11
|
* Include `heapUsed` in log.
|
|
11
12
|
*
|
|
12
|
-
* @default
|
|
13
|
+
* @default false
|
|
13
14
|
*/
|
|
14
15
|
heapUsed?: boolean;
|
|
15
16
|
/**
|
|
@@ -27,7 +28,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
27
28
|
/**
|
|
28
29
|
* Incude Peak RSS in log.
|
|
29
30
|
*
|
|
30
|
-
* @default
|
|
31
|
+
* @default true
|
|
31
32
|
*/
|
|
32
33
|
peakRSS?: boolean;
|
|
33
34
|
/**
|
|
@@ -73,7 +74,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
73
74
|
*
|
|
74
75
|
* chunk is undefined for "final" stats, otherwise is defined.
|
|
75
76
|
*/
|
|
76
|
-
extra?: (chunk: IN | undefined, index: number) =>
|
|
77
|
+
extra?: (chunk: IN | undefined, index: number) => AnyObject;
|
|
77
78
|
}
|
|
78
79
|
/**
|
|
79
80
|
* Pass-through transform that optionally logs progress.
|
|
@@ -15,7 +15,7 @@ const inspectOpt = {
|
|
|
15
15
|
* Pass-through transform that optionally logs progress.
|
|
16
16
|
*/
|
|
17
17
|
function transformLogProgress(opt = {}) {
|
|
18
|
-
const { metric = 'progress', heapTotal: logHeapTotal = false, heapUsed: logHeapUsed =
|
|
18
|
+
const { metric = 'progress', heapTotal: logHeapTotal = false, heapUsed: logHeapUsed = false, rss: logRss = true, peakRSS: logPeakRSS = true, logRPS = true, logEvery = 1000, extra, } = opt;
|
|
19
19
|
const logProgress = opt.logProgress !== false && logEvery !== 0; // true by default
|
|
20
20
|
const logEvery10 = logEvery * 10;
|
|
21
21
|
const started = Date.now();
|
|
@@ -2,17 +2,18 @@ import { AsyncMapper, AsyncPredicate, ErrorMode } from '@naturalcycles/js-lib';
|
|
|
2
2
|
import { TransformTyped } from '../stream.model';
|
|
3
3
|
export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
4
4
|
/**
|
|
5
|
-
* @default false
|
|
6
5
|
* Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
|
|
6
|
+
*
|
|
7
|
+
* @default false
|
|
7
8
|
*/
|
|
8
9
|
flattenArrayOutput?: boolean;
|
|
9
10
|
/**
|
|
10
11
|
* Predicate to filter outgoing results (after mapper).
|
|
11
12
|
* Allows to not emit all results.
|
|
12
13
|
*
|
|
13
|
-
* @default to filter out undefined/null values, but pass anything else
|
|
14
|
-
*
|
|
15
14
|
* Set to `r => r` (passthrough predicate) to pass ANY value (including undefined/null)
|
|
15
|
+
*
|
|
16
|
+
* @default to filter out undefined/null values, but pass anything else
|
|
16
17
|
*/
|
|
17
18
|
predicate?: AsyncPredicate<OUT>;
|
|
18
19
|
/**
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AnyObject } from '@naturalcycles/js-lib';
|
|
1
2
|
import { TransformTyped } from '../../stream.model';
|
|
2
3
|
export interface TransformMultiThreadedOptions {
|
|
3
4
|
/**
|
|
@@ -19,7 +20,7 @@ export interface TransformMultiThreadedOptions {
|
|
|
19
20
|
/**
|
|
20
21
|
* Passed to the Worker as `workerData` property (initial data).
|
|
21
22
|
*/
|
|
22
|
-
workerData?:
|
|
23
|
+
workerData?: AnyObject;
|
|
23
24
|
}
|
|
24
25
|
/**
|
|
25
26
|
* Spawns a pool of Workers (threads).
|
|
@@ -17,7 +17,7 @@ export declare function oneOfSchema<T = any>(...schemas: AnySchemaTyped<any>[]):
|
|
|
17
17
|
export declare const anySchema: import("joi").AnySchema;
|
|
18
18
|
export declare const anyObjectSchema: import("joi").ObjectSchema<any>;
|
|
19
19
|
/**
|
|
20
|
-
* [a-
|
|
20
|
+
* [a-zA-Z0-9_]*
|
|
21
21
|
* 6-64 length
|
|
22
22
|
*/
|
|
23
23
|
export declare const idSchema: import("./string.extensions").ExtendedStringSchema;
|
|
@@ -28,13 +28,10 @@ exports.anySchema = joi_extensions_1.Joi.any();
|
|
|
28
28
|
exports.anyObjectSchema = joi_extensions_1.Joi.object().options({ stripUnknown: false });
|
|
29
29
|
// 1g498efj5sder3324zer
|
|
30
30
|
/**
|
|
31
|
-
* [a-
|
|
31
|
+
* [a-zA-Z0-9_]*
|
|
32
32
|
* 6-64 length
|
|
33
33
|
*/
|
|
34
|
-
exports.idSchema = exports.stringSchema
|
|
35
|
-
.regex(/^[a-z0-9_]*$/)
|
|
36
|
-
.min(6)
|
|
37
|
-
.max(64);
|
|
34
|
+
exports.idSchema = exports.stringSchema.regex(/^[a-zA-Z0-9_]{6,64}$/);
|
|
38
35
|
/**
|
|
39
36
|
* `_` should NOT be allowed to be able to use slug-ids as part of natural ids with `_` separator.
|
|
40
37
|
*/
|
|
@@ -42,7 +39,7 @@ exports.SLUG_PATTERN = /^[a-z0-9-]*$/;
|
|
|
42
39
|
/**
|
|
43
40
|
* "Slug" - a valid URL, filename, etc.
|
|
44
41
|
*/
|
|
45
|
-
exports.slugSchema = exports.stringSchema.regex(
|
|
42
|
+
exports.slugSchema = exports.stringSchema.regex(/^[a-z0-9-]{1,255}$/);
|
|
46
43
|
// 16725225600 is 2500-01-01
|
|
47
44
|
exports.unixTimestampSchema = exports.numberSchema.integer().min(0).max(16725225600);
|
|
48
45
|
// 2
|
|
@@ -17,6 +17,9 @@ export interface JoiValidationErrorData extends ErrorData {
|
|
|
17
17
|
/**
|
|
18
18
|
* Error "annotation" is stripped in Error.message.
|
|
19
19
|
* This field contains the "full" annotation.
|
|
20
|
+
*
|
|
21
|
+
* This field is non-enumerable, won't be printed or included in JSON by default,
|
|
22
|
+
* but still accessible programmatically (via `err.data.annotation`) when needed!
|
|
20
23
|
*/
|
|
21
24
|
annotation?: string;
|
|
22
25
|
}
|
|
@@ -122,10 +122,16 @@ function createError(value, err, objectName) {
|
|
|
122
122
|
tokens.push(annotation);
|
|
123
123
|
}
|
|
124
124
|
const msg = tokens.join('\n');
|
|
125
|
-
|
|
125
|
+
const data = {
|
|
126
126
|
joiValidationErrorItems: err.details,
|
|
127
127
|
...(objectName && { joiValidationObjectName: objectName }),
|
|
128
128
|
...(objectId && { joiValidationObjectId: objectId }),
|
|
129
|
-
|
|
129
|
+
};
|
|
130
|
+
// Make annotation non-enumerable, to not get it automatically printed,
|
|
131
|
+
// but still accessible
|
|
132
|
+
Object.defineProperty(data, 'annotation', {
|
|
133
|
+
enumerable: false,
|
|
134
|
+
value: annotation,
|
|
130
135
|
});
|
|
136
|
+
return new joi_validation_error_1.JoiValidationError(msg, data);
|
|
131
137
|
}
|
|
@@ -3,9 +3,18 @@ import * as Joi from 'joi';
|
|
|
3
3
|
import { AnySchemaTyped } from './joi.model';
|
|
4
4
|
export interface ExtendedStringSchema extends StringSchema, AnySchemaTyped<string> {
|
|
5
5
|
dateString(min?: string, max?: string): this;
|
|
6
|
+
stripHTML(opt?: JoiStripHTMLOptions): this;
|
|
6
7
|
}
|
|
7
|
-
export interface
|
|
8
|
+
export interface JoiDateStringOptions {
|
|
8
9
|
min?: string;
|
|
9
10
|
max?: string;
|
|
10
11
|
}
|
|
12
|
+
export interface JoiStripHTMLOptions {
|
|
13
|
+
/**
|
|
14
|
+
* 'Strict' would throw an error if it detects any HTML.
|
|
15
|
+
* Non-strict (default) does not error, but DOES convert the string to the string without HTML.
|
|
16
|
+
* Internally uses `sanitize-html` library, with allowedTags = [], and method = 'discard'.
|
|
17
|
+
*/
|
|
18
|
+
strict?: boolean;
|
|
19
|
+
}
|
|
11
20
|
export declare function stringExtensions(joi: typeof Joi): Extension;
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.stringExtensions = void 0;
|
|
4
4
|
const time_lib_1 = require("@naturalcycles/time-lib");
|
|
5
|
+
const sanitize = require("sanitize-html");
|
|
5
6
|
function stringExtensions(joi) {
|
|
6
7
|
return {
|
|
7
8
|
type: 'string',
|
|
@@ -11,6 +12,7 @@ function stringExtensions(joi) {
|
|
|
11
12
|
'string.dateStringMin': '"{{#label}}" must be not earlier than {{#min}}',
|
|
12
13
|
'string.dateStringMax': '"{{#label}}" must be not later than {{#max}}',
|
|
13
14
|
'string.dateStringCalendarAccuracy': '"{{#label}}" must be a VALID calendar date',
|
|
15
|
+
'string.stripHTML': '"{{#label}}" must NOT contain any HTML tags',
|
|
14
16
|
},
|
|
15
17
|
rules: {
|
|
16
18
|
dateString: {
|
|
@@ -57,6 +59,7 @@ function stringExtensions(joi) {
|
|
|
57
59
|
err = 'string.dateStringMax';
|
|
58
60
|
}
|
|
59
61
|
else if (!(0, time_lib_1.dayjs)(v).isValid()) {
|
|
62
|
+
// todo: replace with another regex (from ajv-validators) for speed
|
|
60
63
|
err = 'string.dateStringCalendarAccuracy';
|
|
61
64
|
}
|
|
62
65
|
if (err) {
|
|
@@ -65,6 +68,37 @@ function stringExtensions(joi) {
|
|
|
65
68
|
return v; // validation passed
|
|
66
69
|
},
|
|
67
70
|
},
|
|
71
|
+
stripHTML: {
|
|
72
|
+
method(opt) {
|
|
73
|
+
return this.$_addRule({
|
|
74
|
+
name: 'stripHTML',
|
|
75
|
+
args: {
|
|
76
|
+
strict: false,
|
|
77
|
+
...opt,
|
|
78
|
+
},
|
|
79
|
+
});
|
|
80
|
+
},
|
|
81
|
+
args: [
|
|
82
|
+
{
|
|
83
|
+
name: 'strict',
|
|
84
|
+
ref: true,
|
|
85
|
+
assert: v => typeof v === 'boolean',
|
|
86
|
+
message: 'must be a boolean',
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
validate(v, helpers, args) {
|
|
90
|
+
console.log('!!! stripHTML', args, v);
|
|
91
|
+
const { strict = false } = args;
|
|
92
|
+
const r = sanitize(v, {
|
|
93
|
+
allowedTags: [], // no html tags allowed at all
|
|
94
|
+
// disallowedTagsMode: 'discard' // discard is default
|
|
95
|
+
});
|
|
96
|
+
if (strict && r !== v) {
|
|
97
|
+
return helpers.error('string.stripHTML', args);
|
|
98
|
+
}
|
|
99
|
+
return r; // return converted value (or the same, if there was nothing to sanitize)
|
|
100
|
+
},
|
|
101
|
+
},
|
|
68
102
|
},
|
|
69
103
|
};
|
|
70
104
|
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import * as sanitize from 'sanitize-html';
|
|
2
|
+
export declare type SanitizeHTMLOptions = sanitize.IOptions;
|
|
3
|
+
/**
|
|
4
|
+
* Simply a wrapper around `sanitize-html` library.
|
|
5
|
+
*
|
|
6
|
+
* @experimental
|
|
7
|
+
*/
|
|
8
|
+
export declare function sanitizeHTML(s: string, opt?: SanitizeHTMLOptions): string;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.sanitizeHTML = void 0;
|
|
4
|
+
const sanitize = require("sanitize-html");
|
|
5
|
+
/**
|
|
6
|
+
* Simply a wrapper around `sanitize-html` library.
|
|
7
|
+
*
|
|
8
|
+
* @experimental
|
|
9
|
+
*/
|
|
10
|
+
function sanitizeHTML(s, opt) {
|
|
11
|
+
return sanitize(s, opt);
|
|
12
|
+
}
|
|
13
|
+
exports.sanitizeHTML = sanitizeHTML;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@naturalcycles/nodejs-lib",
|
|
3
|
-
"version": "12.
|
|
3
|
+
"version": "12.47.0",
|
|
4
4
|
"scripts": {
|
|
5
5
|
"prepare": "husky install",
|
|
6
6
|
"docs-serve": "vuepress dev docs",
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"@naturalcycles/js-lib": "^14.0.0",
|
|
18
18
|
"@naturalcycles/time-lib": "^3.0.1",
|
|
19
19
|
"@types/lru-cache": "^5.1.0",
|
|
20
|
+
"@types/sanitize-html": "^2.5.0",
|
|
20
21
|
"@types/through2-concurrent": "^2.0.0",
|
|
21
22
|
"ajv": "^8.6.2",
|
|
22
23
|
"ajv-formats": "^2.1.0",
|
|
@@ -35,6 +36,7 @@
|
|
|
35
36
|
"lru-cache": "^6.0.0",
|
|
36
37
|
"move-file": "^2.0.0",
|
|
37
38
|
"nanoid": "^3.0.0",
|
|
39
|
+
"sanitize-html": "^2.5.2",
|
|
38
40
|
"supports-color": "^8.0.0",
|
|
39
41
|
"through2-concurrent": "^2.0.0",
|
|
40
42
|
"yargs": "^17.0.0"
|
package/src/diff/tableDiff.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { StringMap, _truncate } from '@naturalcycles/js-lib'
|
|
1
|
+
import { StringMap, _truncate, AnyObject } from '@naturalcycles/js-lib'
|
|
2
2
|
|
|
3
3
|
export interface TableDiffOptions {
|
|
4
4
|
/**
|
|
@@ -35,11 +35,7 @@ export interface TableDiffOptions {
|
|
|
35
35
|
*
|
|
36
36
|
* Function is located in nodejs-lib (not js-lib), because it's planned to improve in the future and add e.g colors (via chalk).
|
|
37
37
|
*/
|
|
38
|
-
export function tableDiff(
|
|
39
|
-
a: Record<string, any>,
|
|
40
|
-
b: Record<string, any>,
|
|
41
|
-
opt: TableDiffOptions = {},
|
|
42
|
-
): void {
|
|
38
|
+
export function tableDiff(a: AnyObject, b: AnyObject, opt: TableDiffOptions = {}): void {
|
|
43
39
|
const { maxFieldLen, aTitle = 'a', bTitle = 'b' } = opt
|
|
44
40
|
const diff: StringMap<any> = {}
|
|
45
41
|
|
package/src/got/got.model.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { AnyObject } from '@naturalcycles/js-lib'
|
|
1
2
|
import type { Options } from 'got'
|
|
2
3
|
|
|
3
4
|
export interface GetGotOptions extends Options {
|
|
@@ -40,7 +41,7 @@ export interface GetGotOptions extends Options {
|
|
|
40
41
|
maxResponseLength?: number
|
|
41
42
|
}
|
|
42
43
|
|
|
43
|
-
export interface GotRequestContext extends
|
|
44
|
+
export interface GotRequestContext extends AnyObject {
|
|
44
45
|
/**
|
|
45
46
|
* Millisecond-timestamp of when the request was started. To be able to count "time spent".
|
|
46
47
|
*/
|
package/src/index.ts
CHANGED
|
@@ -50,6 +50,7 @@ import {
|
|
|
50
50
|
import { NDJsonStats } from './stream/ndjson/ndjson.model'
|
|
51
51
|
import { ndJsonFileRead } from './stream/ndjson/ndJsonFileRead'
|
|
52
52
|
import { ndJsonFileWrite } from './stream/ndjson/ndJsonFileWrite'
|
|
53
|
+
import { ndjsonMap } from './stream/ndjson/ndjsonMap'
|
|
53
54
|
import {
|
|
54
55
|
ndjsonStreamForEach,
|
|
55
56
|
NDJSONStreamForEachOptions,
|
|
@@ -171,6 +172,7 @@ import {
|
|
|
171
172
|
undefinedIfInvalid,
|
|
172
173
|
validate,
|
|
173
174
|
} from './validation/joi/joi.validation.util'
|
|
175
|
+
import { sanitizeHTML, SanitizeHTMLOptions } from './validation/sanitize.util'
|
|
174
176
|
|
|
175
177
|
export type {
|
|
176
178
|
JoiValidationErrorData,
|
|
@@ -218,6 +220,7 @@ export type {
|
|
|
218
220
|
AjvValidationOptions,
|
|
219
221
|
AjvSchemaCfg,
|
|
220
222
|
AjvValidationErrorData,
|
|
223
|
+
SanitizeHTMLOptions,
|
|
221
224
|
}
|
|
222
225
|
|
|
223
226
|
export {
|
|
@@ -306,6 +309,7 @@ export {
|
|
|
306
309
|
readableMapToArray,
|
|
307
310
|
_pipeline,
|
|
308
311
|
transformBuffer,
|
|
312
|
+
ndjsonMap,
|
|
309
313
|
ndJsonFileRead,
|
|
310
314
|
ndJsonFileWrite,
|
|
311
315
|
ndjsonStreamForEach,
|
|
@@ -347,4 +351,5 @@ export {
|
|
|
347
351
|
readJsonSchemas,
|
|
348
352
|
readAjvSchemas,
|
|
349
353
|
hasColors,
|
|
354
|
+
sanitizeHTML,
|
|
350
355
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { createReadStream, createWriteStream } from 'fs'
|
|
2
|
-
import * as path from 'path'
|
|
3
2
|
import { createGzip, createUnzip } from 'zlib'
|
|
4
3
|
import { AsyncMapper, ErrorMode } from '@naturalcycles/js-lib'
|
|
5
4
|
import {
|
|
@@ -12,28 +11,29 @@ import {
|
|
|
12
11
|
transformSplit,
|
|
13
12
|
transformToNDJson,
|
|
14
13
|
_pipeline,
|
|
14
|
+
TransformLogProgressOptions,
|
|
15
15
|
} from '../..'
|
|
16
16
|
|
|
17
|
-
interface
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOptions<IN, OUT> {
|
|
17
|
+
export interface NDJSONMapOptions<IN = any, OUT = IN>
|
|
18
|
+
extends TransformMapOptions<IN, OUT>,
|
|
19
|
+
TransformLogProgressOptions<IN> {
|
|
22
20
|
inputFilePath: string
|
|
23
21
|
outputFilePath: string
|
|
24
|
-
|
|
22
|
+
|
|
25
23
|
limitInput?: number
|
|
26
24
|
limitOutput?: number
|
|
27
25
|
|
|
28
26
|
/**
|
|
29
|
-
* @default
|
|
27
|
+
* @default 100_000
|
|
30
28
|
*/
|
|
31
|
-
|
|
29
|
+
logEveryOutput?: number
|
|
32
30
|
|
|
33
31
|
/**
|
|
34
|
-
*
|
|
32
|
+
* Defaults to `true` for ndjsonMap
|
|
33
|
+
*
|
|
34
|
+
* @default true
|
|
35
35
|
*/
|
|
36
|
-
|
|
36
|
+
flattenArrayOutput?: boolean
|
|
37
37
|
}
|
|
38
38
|
|
|
39
39
|
/**
|
|
@@ -41,42 +41,18 @@ export interface NDJSONMapOptions<IN = any, OUT = IN> extends TransformMapOption
|
|
|
41
41
|
* Zips output file automatically, if it ends with `.gz`.
|
|
42
42
|
*/
|
|
43
43
|
export async function ndjsonMap<IN = any, OUT = any>(
|
|
44
|
+
mapper: AsyncMapper<IN, OUT>,
|
|
44
45
|
opt: NDJSONMapOptions<IN, OUT>,
|
|
45
46
|
): Promise<void> {
|
|
46
|
-
const {
|
|
47
|
-
inputFilePath,
|
|
48
|
-
outputFilePath,
|
|
49
|
-
mapperFilePath,
|
|
50
|
-
logEveryInput = 1000,
|
|
51
|
-
logEveryOutput = 100_000,
|
|
52
|
-
limitInput,
|
|
53
|
-
limitOutput,
|
|
54
|
-
} = opt
|
|
47
|
+
const { inputFilePath, outputFilePath, logEveryOutput = 100_000, limitInput, limitOutput } = opt
|
|
55
48
|
|
|
56
49
|
requireFileToExist(inputFilePath)
|
|
57
|
-
requireFileToExist(mapperFilePath)
|
|
58
|
-
|
|
59
|
-
const resolvedMapperPath = path.resolve(mapperFilePath)
|
|
60
50
|
|
|
61
51
|
console.log({
|
|
62
52
|
inputFilePath,
|
|
63
53
|
outputFilePath,
|
|
64
|
-
mapperFilePath,
|
|
65
|
-
resolvedMapperPath,
|
|
66
54
|
})
|
|
67
55
|
|
|
68
|
-
// This is to allow importing *.ts mappers
|
|
69
|
-
try {
|
|
70
|
-
require('ts-node/register/transpile-only')
|
|
71
|
-
require('tsconfig-paths/register')
|
|
72
|
-
} catch {} // require if exists
|
|
73
|
-
|
|
74
|
-
const { mapper } = require(resolvedMapperPath) as NDJSONMapperFile<IN, OUT>
|
|
75
|
-
|
|
76
|
-
if (!mapper) {
|
|
77
|
-
throw new Error(`Mapper file should export "mapper" function`)
|
|
78
|
-
}
|
|
79
|
-
|
|
80
56
|
const transformUnzip = inputFilePath.endsWith('.gz') ? [createUnzip()] : []
|
|
81
57
|
const transformZip = outputFilePath.endsWith('.gz') ? [createGzip()] : []
|
|
82
58
|
|
|
@@ -86,7 +62,7 @@ export async function ndjsonMap<IN = any, OUT = any>(
|
|
|
86
62
|
transformSplit(), // splits by \n
|
|
87
63
|
transformJsonParse(),
|
|
88
64
|
transformLimit(limitInput),
|
|
89
|
-
transformLogProgress({ metric: 'read',
|
|
65
|
+
transformLogProgress({ metric: 'read', ...opt }),
|
|
90
66
|
transformMap(mapper, {
|
|
91
67
|
flattenArrayOutput: true,
|
|
92
68
|
errorMode: ErrorMode.SUPPRESS,
|
|
@@ -4,8 +4,9 @@ import { TransformTyped } from '../stream.model'
|
|
|
4
4
|
|
|
5
5
|
export interface TransformJsonParseOptions {
|
|
6
6
|
/**
|
|
7
|
-
* @default true
|
|
8
7
|
* If true - will throw an error on JSON.parse / stringify error
|
|
8
|
+
*
|
|
9
|
+
* @default true
|
|
9
10
|
*/
|
|
10
11
|
strict?: boolean
|
|
11
12
|
|
|
@@ -26,7 +27,7 @@ export interface TransformJsonParseOptions {
|
|
|
26
27
|
* consumeYourStream...
|
|
27
28
|
* [)
|
|
28
29
|
*/
|
|
29
|
-
export function transformJsonParse<OUT =
|
|
30
|
+
export function transformJsonParse<OUT = any>(
|
|
30
31
|
opt: TransformJsonParseOptions = {},
|
|
31
32
|
): TransformTyped<string | Buffer, OUT> {
|
|
32
33
|
const { strict = true, reviver } = opt
|
|
@@ -4,14 +4,16 @@ import { TransformTyped } from '../stream.model'
|
|
|
4
4
|
|
|
5
5
|
export interface TransformToNDJsonOptions {
|
|
6
6
|
/**
|
|
7
|
-
* @default true
|
|
8
7
|
* If true - will throw an error on JSON.parse / stringify error
|
|
8
|
+
*
|
|
9
|
+
* @default true
|
|
9
10
|
*/
|
|
10
11
|
strict?: boolean
|
|
11
12
|
|
|
12
13
|
/**
|
|
13
|
-
* @default false
|
|
14
14
|
* If true - will run `sortObjectDeep()` on each object to achieve deterministic sort
|
|
15
|
+
*
|
|
16
|
+
* @default false
|
|
15
17
|
*/
|
|
16
18
|
sortObjects?: boolean
|
|
17
19
|
|
|
@@ -10,9 +10,7 @@ export interface TransformBufferOptions extends TransformOptions {
|
|
|
10
10
|
*
|
|
11
11
|
* @default batchSize is 10
|
|
12
12
|
*/
|
|
13
|
-
export function transformBuffer<IN =
|
|
14
|
-
opt: TransformBufferOptions,
|
|
15
|
-
): TransformTyped<IN, IN[]> {
|
|
13
|
+
export function transformBuffer<IN = any>(opt: TransformBufferOptions): TransformTyped<IN, IN[]> {
|
|
16
14
|
const { batchSize } = opt
|
|
17
15
|
|
|
18
16
|
let buf: IN[] = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { Transform } from 'stream'
|
|
2
2
|
import { inspect, InspectOptions } from 'util'
|
|
3
|
-
import { SimpleMovingAverage, _mb, _since } from '@naturalcycles/js-lib'
|
|
3
|
+
import { SimpleMovingAverage, _mb, _since, AnyObject } from '@naturalcycles/js-lib'
|
|
4
4
|
import { dayjs } from '@naturalcycles/time-lib'
|
|
5
5
|
import { boldWhite, dimGrey, white, yellow } from '../../colors'
|
|
6
6
|
import { hasColors } from '../../colors/colors'
|
|
@@ -17,7 +17,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
17
17
|
/**
|
|
18
18
|
* Include `heapUsed` in log.
|
|
19
19
|
*
|
|
20
|
-
* @default
|
|
20
|
+
* @default false
|
|
21
21
|
*/
|
|
22
22
|
heapUsed?: boolean
|
|
23
23
|
|
|
@@ -38,7 +38,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
38
38
|
/**
|
|
39
39
|
* Incude Peak RSS in log.
|
|
40
40
|
*
|
|
41
|
-
* @default
|
|
41
|
+
* @default true
|
|
42
42
|
*/
|
|
43
43
|
peakRSS?: boolean
|
|
44
44
|
|
|
@@ -91,7 +91,7 @@ export interface TransformLogProgressOptions<IN = any> extends TransformOptions
|
|
|
91
91
|
*
|
|
92
92
|
* chunk is undefined for "final" stats, otherwise is defined.
|
|
93
93
|
*/
|
|
94
|
-
extra?: (chunk: IN | undefined, index: number) =>
|
|
94
|
+
extra?: (chunk: IN | undefined, index: number) => AnyObject
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
const inspectOpt: InspectOptions = {
|
|
@@ -108,9 +108,9 @@ export function transformLogProgress<IN = any>(
|
|
|
108
108
|
const {
|
|
109
109
|
metric = 'progress',
|
|
110
110
|
heapTotal: logHeapTotal = false,
|
|
111
|
-
heapUsed: logHeapUsed =
|
|
111
|
+
heapUsed: logHeapUsed = false,
|
|
112
112
|
rss: logRss = true,
|
|
113
|
-
peakRSS: logPeakRSS =
|
|
113
|
+
peakRSS: logPeakRSS = true,
|
|
114
114
|
logRPS = true,
|
|
115
115
|
logEvery = 1000,
|
|
116
116
|
extra,
|
|
@@ -12,8 +12,9 @@ import { TransformTyped } from '../stream.model'
|
|
|
12
12
|
|
|
13
13
|
export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
14
14
|
/**
|
|
15
|
-
* @default false
|
|
16
15
|
* Set true to support "multiMap" - possibility to return [] and emit 1 result for each item in the array.
|
|
16
|
+
*
|
|
17
|
+
* @default false
|
|
17
18
|
*/
|
|
18
19
|
flattenArrayOutput?: boolean
|
|
19
20
|
|
|
@@ -21,9 +22,9 @@ export interface TransformMapOptions<IN = any, OUT = IN> {
|
|
|
21
22
|
* Predicate to filter outgoing results (after mapper).
|
|
22
23
|
* Allows to not emit all results.
|
|
23
24
|
*
|
|
24
|
-
* @default to filter out undefined/null values, but pass anything else
|
|
25
|
-
*
|
|
26
25
|
* Set to `r => r` (passthrough predicate) to pass ANY value (including undefined/null)
|
|
26
|
+
*
|
|
27
|
+
* @default to filter out undefined/null values, but pass anything else
|
|
27
28
|
*/
|
|
28
29
|
predicate?: AsyncPredicate<OUT>
|
|
29
30
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Worker } from 'worker_threads'
|
|
2
|
-
import { DeferredPromise, pDefer, _range } from '@naturalcycles/js-lib'
|
|
2
|
+
import { DeferredPromise, pDefer, _range, AnyObject } from '@naturalcycles/js-lib'
|
|
3
3
|
import through2Concurrent = require('through2-concurrent')
|
|
4
4
|
import { TransformTyped } from '../../stream.model'
|
|
5
5
|
import { WorkerInput, WorkerOutput } from './transformMultiThreaded.model'
|
|
@@ -30,7 +30,7 @@ export interface TransformMultiThreadedOptions {
|
|
|
30
30
|
/**
|
|
31
31
|
* Passed to the Worker as `workerData` property (initial data).
|
|
32
32
|
*/
|
|
33
|
-
workerData?:
|
|
33
|
+
workerData?: AnyObject
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
const workerProxyFilePath = `${__dirname}/workerClassProxy.js`
|
|
@@ -42,13 +42,10 @@ export const anyObjectSchema = Joi.object().options({ stripUnknown: false })
|
|
|
42
42
|
|
|
43
43
|
// 1g498efj5sder3324zer
|
|
44
44
|
/**
|
|
45
|
-
* [a-
|
|
45
|
+
* [a-zA-Z0-9_]*
|
|
46
46
|
* 6-64 length
|
|
47
47
|
*/
|
|
48
|
-
export const idSchema = stringSchema
|
|
49
|
-
.regex(/^[a-z0-9_]*$/)
|
|
50
|
-
.min(6)
|
|
51
|
-
.max(64)
|
|
48
|
+
export const idSchema = stringSchema.regex(/^[a-zA-Z0-9_]{6,64}$/)
|
|
52
49
|
|
|
53
50
|
/**
|
|
54
51
|
* `_` should NOT be allowed to be able to use slug-ids as part of natural ids with `_` separator.
|
|
@@ -58,7 +55,7 @@ export const SLUG_PATTERN = /^[a-z0-9-]*$/
|
|
|
58
55
|
/**
|
|
59
56
|
* "Slug" - a valid URL, filename, etc.
|
|
60
57
|
*/
|
|
61
|
-
export const slugSchema = stringSchema.regex(
|
|
58
|
+
export const slugSchema = stringSchema.regex(/^[a-z0-9-]{1,255}$/)
|
|
62
59
|
|
|
63
60
|
// 16725225600 is 2500-01-01
|
|
64
61
|
export const unixTimestampSchema = numberSchema.integer().min(0).max(16725225600)
|
|
@@ -18,6 +18,9 @@ export interface JoiValidationErrorData extends ErrorData {
|
|
|
18
18
|
/**
|
|
19
19
|
* Error "annotation" is stripped in Error.message.
|
|
20
20
|
* This field contains the "full" annotation.
|
|
21
|
+
*
|
|
22
|
+
* This field is non-enumerable, won't be printed or included in JSON by default,
|
|
23
|
+
* but still accessible programmatically (via `err.data.annotation`) when needed!
|
|
21
24
|
*/
|
|
22
25
|
annotation?: string
|
|
23
26
|
}
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import { _hb, _isObject, _truncateMiddle } from '@naturalcycles/js-lib'
|
|
10
10
|
import { ValidationError, ValidationOptions } from 'joi'
|
|
11
11
|
import { AnySchemaTyped } from './joi.model'
|
|
12
|
-
import { JoiValidationError } from './joi.validation.error'
|
|
12
|
+
import { JoiValidationError, JoiValidationErrorData } from './joi.validation.error'
|
|
13
13
|
|
|
14
14
|
// todo: consider replacing with Tuple of [error, value]
|
|
15
15
|
export interface JoiValidationResult<T = any> {
|
|
@@ -163,10 +163,18 @@ function createError(value: any, err: ValidationError, objectName?: string): Joi
|
|
|
163
163
|
|
|
164
164
|
const msg = tokens.join('\n')
|
|
165
165
|
|
|
166
|
-
|
|
166
|
+
const data: JoiValidationErrorData = {
|
|
167
167
|
joiValidationErrorItems: err.details,
|
|
168
168
|
...(objectName && { joiValidationObjectName: objectName }),
|
|
169
169
|
...(objectId && { joiValidationObjectId: objectId }),
|
|
170
|
-
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Make annotation non-enumerable, to not get it automatically printed,
|
|
173
|
+
// but still accessible
|
|
174
|
+
Object.defineProperty(data, 'annotation', {
|
|
175
|
+
enumerable: false,
|
|
176
|
+
value: annotation,
|
|
171
177
|
})
|
|
178
|
+
|
|
179
|
+
return new JoiValidationError(msg, data)
|
|
172
180
|
}
|
|
@@ -1,17 +1,28 @@
|
|
|
1
1
|
import { dayjs } from '@naturalcycles/time-lib'
|
|
2
2
|
import { Extension, StringSchema } from 'joi'
|
|
3
3
|
import * as Joi from 'joi'
|
|
4
|
+
import * as sanitize from 'sanitize-html'
|
|
4
5
|
import { AnySchemaTyped } from './joi.model'
|
|
5
6
|
|
|
6
7
|
export interface ExtendedStringSchema extends StringSchema, AnySchemaTyped<string> {
|
|
7
8
|
dateString(min?: string, max?: string): this
|
|
9
|
+
stripHTML(opt?: JoiStripHTMLOptions): this
|
|
8
10
|
}
|
|
9
11
|
|
|
10
|
-
export interface
|
|
12
|
+
export interface JoiDateStringOptions {
|
|
11
13
|
min?: string
|
|
12
14
|
max?: string
|
|
13
15
|
}
|
|
14
16
|
|
|
17
|
+
export interface JoiStripHTMLOptions {
|
|
18
|
+
/**
|
|
19
|
+
* 'Strict' would throw an error if it detects any HTML.
|
|
20
|
+
* Non-strict (default) does not error, but DOES convert the string to the string without HTML.
|
|
21
|
+
* Internally uses `sanitize-html` library, with allowedTags = [], and method = 'discard'.
|
|
22
|
+
*/
|
|
23
|
+
strict?: boolean
|
|
24
|
+
}
|
|
25
|
+
|
|
15
26
|
export function stringExtensions(joi: typeof Joi): Extension {
|
|
16
27
|
return {
|
|
17
28
|
type: 'string',
|
|
@@ -21,13 +32,14 @@ export function stringExtensions(joi: typeof Joi): Extension {
|
|
|
21
32
|
'string.dateStringMin': '"{{#label}}" must be not earlier than {{#min}}',
|
|
22
33
|
'string.dateStringMax': '"{{#label}}" must be not later than {{#max}}',
|
|
23
34
|
'string.dateStringCalendarAccuracy': '"{{#label}}" must be a VALID calendar date',
|
|
35
|
+
'string.stripHTML': '"{{#label}}" must NOT contain any HTML tags',
|
|
24
36
|
},
|
|
25
37
|
rules: {
|
|
26
38
|
dateString: {
|
|
27
39
|
method(min?: string, max?: string) {
|
|
28
40
|
return this.$_addRule({
|
|
29
41
|
name: 'dateString',
|
|
30
|
-
args: { min, max } as
|
|
42
|
+
args: { min, max } as JoiDateStringOptions,
|
|
31
43
|
})
|
|
32
44
|
},
|
|
33
45
|
args: [
|
|
@@ -44,7 +56,7 @@ export function stringExtensions(joi: typeof Joi): Extension {
|
|
|
44
56
|
message: 'must be a string',
|
|
45
57
|
},
|
|
46
58
|
],
|
|
47
|
-
validate(v: string, helpers, args:
|
|
59
|
+
validate(v: string, helpers, args: JoiDateStringOptions) {
|
|
48
60
|
// console.log('dateString validate called', {v, args})
|
|
49
61
|
|
|
50
62
|
let err: string | undefined
|
|
@@ -67,6 +79,7 @@ export function stringExtensions(joi: typeof Joi): Extension {
|
|
|
67
79
|
} else if (max && v > max) {
|
|
68
80
|
err = 'string.dateStringMax'
|
|
69
81
|
} else if (!dayjs(v).isValid()) {
|
|
82
|
+
// todo: replace with another regex (from ajv-validators) for speed
|
|
70
83
|
err = 'string.dateStringCalendarAccuracy'
|
|
71
84
|
}
|
|
72
85
|
|
|
@@ -77,6 +90,40 @@ export function stringExtensions(joi: typeof Joi): Extension {
|
|
|
77
90
|
return v // validation passed
|
|
78
91
|
},
|
|
79
92
|
},
|
|
93
|
+
stripHTML: {
|
|
94
|
+
method(opt?: JoiStripHTMLOptions) {
|
|
95
|
+
return this.$_addRule({
|
|
96
|
+
name: 'stripHTML',
|
|
97
|
+
args: {
|
|
98
|
+
strict: false,
|
|
99
|
+
...opt,
|
|
100
|
+
},
|
|
101
|
+
})
|
|
102
|
+
},
|
|
103
|
+
args: [
|
|
104
|
+
{
|
|
105
|
+
name: 'strict',
|
|
106
|
+
ref: true,
|
|
107
|
+
assert: v => typeof v === 'boolean',
|
|
108
|
+
message: 'must be a boolean',
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
validate(v: string, helpers, args: JoiStripHTMLOptions) {
|
|
112
|
+
console.log('!!! stripHTML', args, v)
|
|
113
|
+
const { strict = false } = args
|
|
114
|
+
|
|
115
|
+
const r = sanitize(v, {
|
|
116
|
+
allowedTags: [], // no html tags allowed at all
|
|
117
|
+
// disallowedTagsMode: 'discard' // discard is default
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
if (strict && r !== v) {
|
|
121
|
+
return helpers.error('string.stripHTML', args)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return r // return converted value (or the same, if there was nothing to sanitize)
|
|
125
|
+
},
|
|
126
|
+
},
|
|
80
127
|
},
|
|
81
128
|
}
|
|
82
129
|
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import * as sanitize from 'sanitize-html'
|
|
2
|
+
|
|
3
|
+
export type SanitizeHTMLOptions = sanitize.IOptions
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Simply a wrapper around `sanitize-html` library.
|
|
7
|
+
*
|
|
8
|
+
* @experimental
|
|
9
|
+
*/
|
|
10
|
+
export function sanitizeHTML(s: string, opt?: SanitizeHTMLOptions): string {
|
|
11
|
+
return sanitize(s, opt)
|
|
12
|
+
}
|