iscc-core-ts 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/lib/cjs/codec.d.ts +7 -0
- package/lib/cjs/codec.js +121 -0
- package/lib/cjs/codec.js.map +1 -0
- package/lib/cjs/constants.d.ts +44 -0
- package/lib/cjs/constants.js +67 -0
- package/lib/cjs/constants.js.map +1 -0
- package/lib/cjs/content-normalization.d.ts +20 -0
- package/lib/cjs/content-normalization.js +151 -0
- package/lib/cjs/content-normalization.js.map +1 -0
- package/lib/cjs/metacode.d.ts +23 -0
- package/lib/cjs/metacode.js +146 -0
- package/lib/cjs/metacode.js.map +1 -0
- package/lib/cjs/simhash.d.ts +1 -0
- package/lib/cjs/simhash.js +32 -0
- package/lib/cjs/simhash.js.map +1 -0
- package/lib/cjs/utils.d.ts +10 -0
- package/lib/cjs/utils.js +106 -0
- package/lib/cjs/utils.js.map +1 -0
- package/lib/esm/codec.d.ts +7 -0
- package/lib/esm/codec.js +112 -0
- package/lib/esm/codec.js.map +1 -0
- package/lib/esm/constants.d.ts +44 -0
- package/lib/esm/constants.js +64 -0
- package/lib/esm/constants.js.map +1 -0
- package/lib/esm/content-normalization.d.ts +20 -0
- package/lib/esm/content-normalization.js +140 -0
- package/lib/esm/content-normalization.js.map +1 -0
- package/lib/esm/metacode.d.ts +23 -0
- package/lib/esm/metacode.js +139 -0
- package/lib/esm/metacode.js.map +1 -0
- package/lib/esm/simhash.d.ts +1 -0
- package/lib/esm/simhash.js +28 -0
- package/lib/esm/simhash.js.map +1 -0
- package/lib/esm/utils.d.ts +10 -0
- package/lib/esm/utils.js +93 -0
- package/lib/esm/utils.js.map +1 -0
- package/lib/tsconfig-cjs.tsbuildinfo +1 -1
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/package.json +16 -3
package/README.md
CHANGED
|
@@ -1,2 +1,44 @@
|
|
|
1
1
|
# iscc-core-ts
|
|
2
2
|
Iscc core TypeScript implementation of iscc-core reference
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Prerequiste
|
|
7
|
+
|
|
8
|
+
We recomand to use [nvm](https://github.com/nvm-sh/nvm) to install and target node and npm versions. [Here](https://www.freecodecamp.org/news/node-version-manager-nvm-install-guide/) an NVM install guide.
|
|
9
|
+
|
|
10
|
+
Nvm version used:
|
|
11
|
+
```sh
|
|
12
|
+
nvm --version
|
|
13
|
+
0.39.3
|
|
14
|
+
```
|
|
15
|
+
Install node 21
|
|
16
|
+
|
|
17
|
+
```sh
|
|
18
|
+
nvm install 21
|
|
19
|
+
v21.7.1 is already installed.
|
|
20
|
+
Now using node v21.7.1 (npm v10.5.0)
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
# Install and build
|
|
24
|
+
|
|
25
|
+
```sh
|
|
26
|
+
npm i
|
|
27
|
+
npm run build
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
# Tests
|
|
31
|
+
|
|
32
|
+
```sh
|
|
33
|
+
npm run test
|
|
34
|
+
npm run test-isolated
|
|
35
|
+
npm run test-esm
|
|
36
|
+
npm run test-esm-isolated
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { MainTypes, SubTypes, Version } from './constants';
|
|
2
|
+
export declare function toHexString(bytes: Uint8Array): string;
|
|
3
|
+
export declare function encode_length(mtype: MainTypes, length: number): number;
|
|
4
|
+
export declare function encode_header(mtype: MainTypes, stype: SubTypes, version: Version, length: number): string;
|
|
5
|
+
export declare function encode_varnibble(n: number): string;
|
|
6
|
+
export declare function encode_component(mtype: MainTypes, stype: SubTypes, version: Version, bit_length: number, digest: string): string;
|
|
7
|
+
export declare function encode_base32(data: string): string;
|
package/lib/cjs/codec.js
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.encode_base32 = exports.encode_component = exports.encode_varnibble = exports.encode_header = exports.encode_length = exports.toHexString = void 0;
|
|
4
|
+
const rfc4648_1 = require("rfc4648");
|
|
5
|
+
const utils_1 = require("./utils");
|
|
6
|
+
const constants_1 = require("./constants");
|
|
7
|
+
function toHexString(bytes) {
|
|
8
|
+
let result = '';
|
|
9
|
+
for (const byte of bytes) {
|
|
10
|
+
if (byte > 15) {
|
|
11
|
+
result += (byte & 0xff).toString(16);
|
|
12
|
+
}
|
|
13
|
+
else {
|
|
14
|
+
result += '0' + (byte & 0xff).toString(16);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
return result;
|
|
18
|
+
}
|
|
19
|
+
exports.toHexString = toHexString;
|
|
20
|
+
function encode_length(mtype, length) {
|
|
21
|
+
const error = 'Invalid length [' + length + '] for MainType [' + mtype + ']';
|
|
22
|
+
if (mtype == constants_1.MainTypes.META ||
|
|
23
|
+
mtype == constants_1.MainTypes.SEMANTIC ||
|
|
24
|
+
mtype == constants_1.MainTypes.CONTENT ||
|
|
25
|
+
mtype == constants_1.MainTypes.DATA ||
|
|
26
|
+
mtype == constants_1.MainTypes.INSTANCE ||
|
|
27
|
+
mtype == constants_1.MainTypes.FLAKE) {
|
|
28
|
+
if (length >= 32 && length % 32 == 0) {
|
|
29
|
+
return Math.floor(length / 32) - 1;
|
|
30
|
+
}
|
|
31
|
+
else {
|
|
32
|
+
throw Error(error);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
else if (mtype == constants_1.MainTypes.ISCC) {
|
|
36
|
+
if (0 <= length && length <= 7) {
|
|
37
|
+
return length;
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
throw Error(error);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
else if (mtype == constants_1.MainTypes.ID) {
|
|
44
|
+
if (64 <= length && length <= 96) {
|
|
45
|
+
return Math.floor((length - 64) / 8);
|
|
46
|
+
}
|
|
47
|
+
else {
|
|
48
|
+
throw Error('MainType [' + mtype + '] is not a unit');
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
throw Error(error);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
exports.encode_length = encode_length;
|
|
56
|
+
function encode_header(mtype, stype, version, length) {
|
|
57
|
+
let header = '';
|
|
58
|
+
for (const n of [
|
|
59
|
+
mtype.valueOf(),
|
|
60
|
+
stype.valueOf(),
|
|
61
|
+
version.valueOf(),
|
|
62
|
+
length
|
|
63
|
+
]) {
|
|
64
|
+
header += encode_varnibble(n);
|
|
65
|
+
}
|
|
66
|
+
return toHexString((0, utils_1.binaryArrayToUint8Array)(header));
|
|
67
|
+
}
|
|
68
|
+
exports.encode_header = encode_header;
|
|
69
|
+
function encode_varnibble(n) {
|
|
70
|
+
if (0 <= n && n < 8) {
|
|
71
|
+
return n.toString(2).padStart(4, '0');
|
|
72
|
+
// return new BitSet(n).toString().padStart(4,"0");
|
|
73
|
+
}
|
|
74
|
+
else if (8 <= n && n < 72) {
|
|
75
|
+
// return new BitSet("10").toString() + new BitSet(n-8).toString().padStart(6,"0");
|
|
76
|
+
return '10' + (n - 8).toString(2).padStart(6, '0');
|
|
77
|
+
}
|
|
78
|
+
else if (72 <= n && n < 584) {
|
|
79
|
+
return '110' + (n - 72).toString(2).padStart(9, '0');
|
|
80
|
+
// return new BitSet("110").toString() + new BitSet(n-72).toString().padStart(9,"0");
|
|
81
|
+
}
|
|
82
|
+
else if (584 <= n && n < 4680) {
|
|
83
|
+
return '1110' + (n - 584).toString(2).padStart(12, '0');
|
|
84
|
+
// return new BitSet("1110").toString() + new BitSet(n-584).toString().padStart(12,"0");
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
throw Error('encode_varnibble - Value must be between 0 and 4679');
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
exports.encode_varnibble = encode_varnibble;
|
|
91
|
+
function encode_component(mtype, stype, version, bit_length, digest) {
|
|
92
|
+
if (mtype == constants_1.MainTypes.META ||
|
|
93
|
+
mtype == constants_1.MainTypes.SEMANTIC ||
|
|
94
|
+
mtype == constants_1.MainTypes.CONTENT ||
|
|
95
|
+
mtype == constants_1.MainTypes.DATA ||
|
|
96
|
+
mtype == constants_1.MainTypes.INSTANCE ||
|
|
97
|
+
mtype == constants_1.MainTypes.ID ||
|
|
98
|
+
mtype == constants_1.MainTypes.FLAKE) {
|
|
99
|
+
const encoded_length = encode_length(mtype, bit_length);
|
|
100
|
+
const nbytes = Math.floor(bit_length / 8);
|
|
101
|
+
const header = encode_header(mtype, stype, version, encoded_length);
|
|
102
|
+
const body = digest.substring(0, nbytes * 2);
|
|
103
|
+
const component_code = encode_base32(header + body);
|
|
104
|
+
return component_code;
|
|
105
|
+
}
|
|
106
|
+
else if (mtype == constants_1.MainTypes.ISCC) {
|
|
107
|
+
throw Error('{mtype} ISCC is not a unit');
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
throw Error('Illegal MainType');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
exports.encode_component = encode_component;
|
|
114
|
+
function encode_base32(data) {
|
|
115
|
+
/*
|
|
116
|
+
Standard RFC4648 base32 encoding without padding.
|
|
117
|
+
*/
|
|
118
|
+
return (0, utils_1.rtrim)(rfc4648_1.base32.stringify(Buffer.from(data, 'hex')), '=');
|
|
119
|
+
}
|
|
120
|
+
exports.encode_base32 = encode_base32;
|
|
121
|
+
//# sourceMappingURL=codec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"codec.js","sourceRoot":"","sources":["../../src/codec.ts"],"names":[],"mappings":";;;AAAA,qCAAiC;AACjC,mCAAyD;AACzD,2CAA2D;AAE3D,SAAgB,WAAW,CAAC,KAAiB;IACzC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,IAAI,IAAI,GAAG,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACJ,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC/C,CAAC;IACL,CAAC;IACD,OAAO,MAAM,CAAC;AAClB,CAAC;AAVD,kCAUC;AAED,SAAgB,aAAa,CAAC,KAAgB,EAAE,MAAc;IAC1D,MAAM,KAAK,GACP,kBAAkB,GAAG,MAAM,GAAG,kBAAkB,GAAG,KAAK,GAAG,GAAG,CAAC;IACnE,IACI,KAAK,IAAI,qBAAS,CAAC,IAAI;QACvB,KAAK,IAAI,qBAAS,CAAC,QAAQ;QAC3B,KAAK,IAAI,qBAAS,CAAC,OAAO;QAC1B,KAAK,IAAI,qBAAS,CAAC,IAAI;QACvB,KAAK,IAAI,qBAAS,CAAC,QAAQ;QAC3B,KAAK,IAAI,qBAAS,CAAC,KAAK,EAC1B,CAAC;QACC,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,GAAG,EAAE,IAAI,CAAC,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;QACvC,CAAC;aAAM,CAAC;YACJ,MAAM,KAAK,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC;IACL,CAAC;SAAM,IAAI,KAAK,IAAI,qBAAS,CAAC,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,MAAM,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;YAC7B,OAAO,MAAM,CAAC;QAClB,CAAC;aAAM,CAAC;YACJ,MAAM,KAAK,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC;IACL,CAAC;SAAM,IAAI,KAAK,IAAI,qBAAS,CAAC,EAAE,EAAE,CAAC;QAC/B,IAAI,EAAE,IAAI,MAAM,IAAI,MAAM,IAAI,EAAE,EAAE,CAAC;YAC/B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;QACzC,CAAC;aAAM,CAAC;YACJ,MAAM,KAAK,CAAC,YAAY,GAAG,KAAK,GAAG,iBAAiB,CAAC,CAAC;QAC1D,CAAC;IACL,CAAC;SAAM,CAAC;QACJ,MAAM,KAAK,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;AACL,CAAC;AA/BD,sCA+BC;AAED,SAAgB,aAAa,CACzB,KAAgB,EAChB,KAAe,EACf,OAAgB,EAChB,MAAc;IAEd,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,MAAM,CAAC,IAAI;QACZ,KAAK,CAAC,OAAO,EAAE;QACf,KAAK,CAAC,OAAO,EAAE;QACf,OAAO,CAAC,OAAO,EAAE;QACjB,MAAM;KACT,EAAE,CAAC;QACA,MAAM,IAAI,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,WAAW,CAAC,IAAA,+BAAuB,EAAC,MAAM,CAAC,CAAC,CAAC;AACxD,CAAC;AAhBD,sCAgBC;AAED,SAAgB,gBAAgB,CAAC,CAAS;IACtC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACtC,oDAAoD;IACxD,CAAC;SAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QAC1B,oFAAoF;QACpF,OAAO,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACvD,CAAC;SAAM,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;QAC5B,OAAO,KAAK,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACrD,sFAAsF;IAC1F,CAAC;SAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC;QAC9B,OAAO,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QACxD,wFAAwF;IAC5F,CAAC;SAAM,CAAC;QACJ,MAAM,KAAK,CAAC,qDAAqD,CAAC,CAAC;IACvE,CAAC;AACL,CAAC;AAhBD,4CAgBC;AAED,SAAgB,gBAAgB,CAC5B,KAAgB,EAChB,KAAe,EACf,OAAgB,EAChB,UAAkB,EAClB,MAAc;IAEd,IACI,KAAK,IAAI,qBAAS,CAAC,IAAI;QACvB,KAAK,IAAI,qBAAS,CAAC,QAAQ;QAC3B,KAAK,IAAI,qBAAS,CAAC,OAAO;QAC1B,KAAK,IAAI,qBAAS,CAAC,IAAI;QACvB,KAAK,IAAI,qBAAS,CAAC,QAAQ;QAC3B,KAAK,IAAI,qBAAS,CAAC,EAAE;QACrB,KAAK,IAAI,qBAAS,CAAC,KAAK,EAC1B,CAAC;QACC,MAAM,cAAc,GAAW,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;QAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,aAAa,CAAC,KAAK,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC,CAAC;QACpE,MAAM,IAAI,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC;QAC7C,MAAM,cAAc,GAAG,aAAa,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;QACpD,OAAO,cAAc,CAAC;IAC1B,CAAC;SAAM,IAAI,KAAK,IAAI,qBAAS,CAAC,IAAI,EAAE,CAAC;QACjC,MAAM,KAAK,CAAC,4BAA4B,CAAC,CAAC;IAC9C,CAAC;SAAM,CAAC;QACJ,MAAM,KAAK,CAAC,kBAAkB,CAAC,CAAC;IACpC,CAAC;AACL,CAAC;AA3BD,4CA2BC;AAED,SAAgB,aAAa,CAAC,IAAY;IACtC;;IAEA;IACA,OAAO,IAAA,aAAK,EAAC,gBAAM,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAClE,CAAC;AALD,sCAKC","sourcesContent":["import { base32 } from 'rfc4648';\nimport { binaryArrayToUint8Array, rtrim } from './utils';\nimport { MainTypes, SubTypes, Version } from './constants';\n\nexport function toHexString(bytes: Uint8Array): string {\n let result = '';\n for (const byte of bytes) {\n if (byte > 15) {\n result += (byte & 0xff).toString(16);\n } else {\n result += '0' + (byte & 0xff).toString(16);\n }\n }\n return result;\n}\n\nexport function encode_length(mtype: MainTypes, length: number): number {\n const error =\n 'Invalid length [' + length + '] for MainType [' + mtype + ']';\n if (\n mtype == MainTypes.META ||\n mtype == MainTypes.SEMANTIC ||\n mtype == MainTypes.CONTENT ||\n mtype == MainTypes.DATA ||\n mtype == MainTypes.INSTANCE ||\n mtype == MainTypes.FLAKE\n ) {\n if (length >= 32 && length % 32 == 0) {\n return Math.floor(length / 32) - 1;\n } else {\n throw Error(error);\n }\n } else if (mtype == MainTypes.ISCC) {\n if (0 <= length && length <= 7) {\n return length;\n } else {\n throw Error(error);\n }\n } else if (mtype == MainTypes.ID) {\n if (64 <= length && length <= 96) {\n return Math.floor((length - 64) / 8);\n } else {\n throw Error('MainType [' + mtype + '] is not a unit');\n }\n } else {\n throw Error(error);\n }\n}\n\nexport function encode_header(\n mtype: MainTypes,\n stype: SubTypes,\n version: Version,\n length: number\n): string {\n let header = '';\n for (const n of [\n mtype.valueOf(),\n stype.valueOf(),\n version.valueOf(),\n length\n ]) {\n header += encode_varnibble(n);\n }\n return toHexString(binaryArrayToUint8Array(header));\n}\n\nexport function encode_varnibble(n: number): string {\n if (0 <= n && n < 8) {\n return n.toString(2).padStart(4, '0');\n // return new BitSet(n).toString().padStart(4,\"0\");\n } else if (8 <= n && n < 72) {\n // return new BitSet(\"10\").toString() + new BitSet(n-8).toString().padStart(6,\"0\");\n return '10' + (n - 8).toString(2).padStart(6, '0');\n } else if (72 <= n && n < 584) {\n return '110' + (n - 72).toString(2).padStart(9, '0');\n // return new BitSet(\"110\").toString() + new BitSet(n-72).toString().padStart(9,\"0\");\n } else if (584 <= n && n < 4680) {\n return '1110' + (n - 584).toString(2).padStart(12, '0');\n // return new BitSet(\"1110\").toString() + new BitSet(n-584).toString().padStart(12,\"0\");\n } else {\n throw Error('encode_varnibble - Value must be between 0 and 4679');\n }\n}\n\nexport function encode_component(\n mtype: MainTypes,\n stype: SubTypes,\n version: Version,\n bit_length: number,\n digest: string\n): string {\n if (\n mtype == MainTypes.META ||\n mtype == MainTypes.SEMANTIC ||\n mtype == MainTypes.CONTENT ||\n mtype == MainTypes.DATA ||\n mtype == MainTypes.INSTANCE ||\n mtype == MainTypes.ID ||\n mtype == MainTypes.FLAKE\n ) {\n const encoded_length: number = encode_length(mtype, bit_length);\n const nbytes = Math.floor(bit_length / 8);\n const header = encode_header(mtype, stype, version, encoded_length);\n const body = digest.substring(0, nbytes * 2);\n const component_code = encode_base32(header + body);\n return component_code;\n } else if (mtype == MainTypes.ISCC) {\n throw Error('{mtype} ISCC is not a unit');\n } else {\n throw Error('Illegal MainType');\n }\n}\n\nexport function encode_base32(data: string) {\n /*\n Standard RFC4648 base32 encoding without padding.\n */\n return rtrim(base32.stringify(Buffer.from(data, 'hex')), '=');\n}\n"]}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export declare const METACODE_BITS: number;
|
|
2
|
+
export declare const META_TRIM_NAME: number;
|
|
3
|
+
export declare const META_NGRAM_SIZE_TEXT: number;
|
|
4
|
+
export declare const META_NGRAM_SIZE_BYTES: number;
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* """
|
|
8
|
+
## MT - MainTypes
|
|
9
|
+
|
|
10
|
+
| Uint | Symbol | Bits | Purpose |
|
|
11
|
+
|----- |:---------|------|---------------------------------------------------------|
|
|
12
|
+
| 0 | META | 0000 | Match on metadata similarity |
|
|
13
|
+
| 1 | SEMANTIC | 0001 | Match on semantic content similarity |
|
|
14
|
+
| 2 | CONTENT | 0010 | Match on perceptual content similarity |
|
|
15
|
+
| 3 | DATA | 0011 | Match on data similarity |
|
|
16
|
+
| 4 | INSTANCE | 0100 | Match on data identity |
|
|
17
|
+
| 5 | ISCC | 0101 | Composite of two or more ISCC-UNITs with common header |
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
*/
|
|
21
|
+
export declare enum MainTypes {
|
|
22
|
+
META = 0,
|
|
23
|
+
SEMANTIC = 1,
|
|
24
|
+
CONTENT = 2,
|
|
25
|
+
DATA = 3,
|
|
26
|
+
INSTANCE = 4,
|
|
27
|
+
ISCC = 5,
|
|
28
|
+
ID = 6,
|
|
29
|
+
FLAKE = 7
|
|
30
|
+
}
|
|
31
|
+
export declare enum SubTypes {
|
|
32
|
+
NONE = 0
|
|
33
|
+
}
|
|
34
|
+
export declare enum Version {
|
|
35
|
+
V0 = 0
|
|
36
|
+
}
|
|
37
|
+
export interface IMetaCodeResult {
|
|
38
|
+
iscc: string;
|
|
39
|
+
metahash: string;
|
|
40
|
+
name: string;
|
|
41
|
+
description?: string;
|
|
42
|
+
meta?: string;
|
|
43
|
+
version: number;
|
|
44
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Version = exports.SubTypes = exports.MainTypes = exports.META_NGRAM_SIZE_BYTES = exports.META_NGRAM_SIZE_TEXT = exports.META_TRIM_NAME = exports.METACODE_BITS = void 0;
|
|
4
|
+
exports.METACODE_BITS = 64;
|
|
5
|
+
exports.META_TRIM_NAME = 128;
|
|
6
|
+
exports.META_NGRAM_SIZE_TEXT = 3;
|
|
7
|
+
exports.META_NGRAM_SIZE_BYTES = 4;
|
|
8
|
+
/**
|
|
9
|
+
*
|
|
10
|
+
* """
|
|
11
|
+
## MT - MainTypes
|
|
12
|
+
|
|
13
|
+
| Uint | Symbol | Bits | Purpose |
|
|
14
|
+
|----- |:---------|------|---------------------------------------------------------|
|
|
15
|
+
| 0 | META | 0000 | Match on metadata similarity |
|
|
16
|
+
| 1 | SEMANTIC | 0001 | Match on semantic content similarity |
|
|
17
|
+
| 2 | CONTENT | 0010 | Match on perceptual content similarity |
|
|
18
|
+
| 3 | DATA | 0011 | Match on data similarity |
|
|
19
|
+
| 4 | INSTANCE | 0100 | Match on data identity |
|
|
20
|
+
| 5 | ISCC | 0101 | Composite of two or more ISCC-UNITs with common header |
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
*/
|
|
24
|
+
var MainTypes;
|
|
25
|
+
(function (MainTypes) {
|
|
26
|
+
MainTypes[MainTypes["META"] = 0] = "META";
|
|
27
|
+
MainTypes[MainTypes["SEMANTIC"] = 1] = "SEMANTIC";
|
|
28
|
+
MainTypes[MainTypes["CONTENT"] = 2] = "CONTENT";
|
|
29
|
+
MainTypes[MainTypes["DATA"] = 3] = "DATA";
|
|
30
|
+
MainTypes[MainTypes["INSTANCE"] = 4] = "INSTANCE";
|
|
31
|
+
MainTypes[MainTypes["ISCC"] = 5] = "ISCC";
|
|
32
|
+
MainTypes[MainTypes["ID"] = 6] = "ID";
|
|
33
|
+
MainTypes[MainTypes["FLAKE"] = 7] = "FLAKE";
|
|
34
|
+
})(MainTypes || (exports.MainTypes = MainTypes = {}));
|
|
35
|
+
/*
|
|
36
|
+
## ST - SubTypes
|
|
37
|
+
|
|
38
|
+
| Uint | Symbol | Bits | Purpose |
|
|
39
|
+
|----- |:---------|------|---------------------------------------------------------|
|
|
40
|
+
| 0 | NONE | 0000 | For MainTypes that do not specify SubTypes |
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
NONE = 0
|
|
44
|
+
*/
|
|
45
|
+
var SubTypes;
|
|
46
|
+
(function (SubTypes) {
|
|
47
|
+
SubTypes[SubTypes["NONE"] = 0] = "NONE";
|
|
48
|
+
})(SubTypes || (exports.SubTypes = SubTypes = {}));
|
|
49
|
+
/*
|
|
50
|
+
## VS - Version
|
|
51
|
+
|
|
52
|
+
Code Version
|
|
53
|
+
|
|
54
|
+
| Uint | Symbol | Bits | Purpose |
|
|
55
|
+
|----- |:---------|------|---------------------------------------------------------|
|
|
56
|
+
| 0 | V0 | 0000 | Initial Version of Code without breaking changes |
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
V0 = 0
|
|
61
|
+
|
|
62
|
+
*/
|
|
63
|
+
var Version;
|
|
64
|
+
(function (Version) {
|
|
65
|
+
Version[Version["V0"] = 0] = "V0";
|
|
66
|
+
})(Version || (exports.Version = Version = {}));
|
|
67
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"constants.js","sourceRoot":"","sources":["../../src/constants.ts"],"names":[],"mappings":";;;AAAa,QAAA,aAAa,GAAW,EAAE,CAAC;AAC3B,QAAA,cAAc,GAAW,GAAG,CAAC;AAC7B,QAAA,oBAAoB,GAAW,CAAC,CAAC;AACjC,QAAA,qBAAqB,GAAW,CAAC,CAAC;AAE/C;;;;;;;;;;;;;;;GAeG;AACH,IAAY,SASX;AATD,WAAY,SAAS;IACjB,yCAAQ,CAAA;IACR,iDAAY,CAAA;IACZ,+CAAW,CAAA;IACX,yCAAQ,CAAA;IACR,iDAAY,CAAA;IACZ,yCAAQ,CAAA;IACR,qCAAM,CAAA;IACN,2CAAS,CAAA;AACb,CAAC,EATW,SAAS,yBAAT,SAAS,QASpB;AAED;;;;;;;;;MASM;AAEN,IAAY,QAEX;AAFD,WAAY,QAAQ;IAChB,uCAAQ,CAAA;AACZ,CAAC,EAFW,QAAQ,wBAAR,QAAQ,QAEnB;AAED;;;;;;;;;;;;;MAaM;AAEN,IAAY,OAEX;AAFD,WAAY,OAAO;IACf,iCAAM,CAAA;AACV,CAAC,EAFW,OAAO,uBAAP,OAAO,QAElB","sourcesContent":["export const METACODE_BITS: number = 64;\nexport const META_TRIM_NAME: number = 128;\nexport const META_NGRAM_SIZE_TEXT: number = 3;\nexport const META_NGRAM_SIZE_BYTES: number = 4;\n\n/**\n * \n * \"\"\"\n ## MT - MainTypes\n\n | Uint | Symbol | Bits | Purpose |\n |----- |:---------|------|---------------------------------------------------------|\n | 0 | META | 0000 | Match on metadata similarity |\n | 1 | SEMANTIC | 0001 | Match on semantic content similarity |\n | 2 | CONTENT | 0010 | Match on perceptual content similarity |\n | 3 | DATA | 0011 | Match on data similarity |\n | 4 | INSTANCE | 0100 | Match on data identity |\n | 5 | ISCC | 0101 | Composite of two or more ISCC-UNITs with common header |\n \"\"\"\n\n */\nexport enum MainTypes {\n META = 0,\n SEMANTIC = 1,\n CONTENT = 2,\n DATA = 3,\n INSTANCE = 4,\n ISCC = 5,\n ID = 6,\n FLAKE = 7\n}\n\n/*\n ## ST - SubTypes\n \n | Uint | Symbol | Bits | Purpose |\n |----- |:---------|------|---------------------------------------------------------|\n | 0 | NONE | 0000 | For MainTypes that do not specify SubTypes |\n \"\"\"\n \n NONE = 0\n */\n\nexport enum SubTypes {\n NONE = 0\n}\n\n/*\n ## VS - Version\n \n Code Version\n \n | Uint | Symbol | Bits | Purpose |\n |----- |:---------|------|---------------------------------------------------------|\n | 0 | V0 | 0000 | Initial Version of Code without breaking changes |\n \n \"\"\"\n \n V0 = 0\n \n */\n\nexport enum Version {\n V0 = 0\n}\n\nexport interface IMetaCodeResult {\n iscc: string;\n metahash: string;\n name: string;\n description?: string;\n meta?: string;\n version: number;\n}\n"]}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
*
|
|
3
|
+
*
|
|
4
|
+
* Specification implementaton of https://iscc.codes/specification/#text_trim
|
|
5
|
+
*
|
|
6
|
+
* @param text
|
|
7
|
+
* @returns
|
|
8
|
+
*/
|
|
9
|
+
export declare function text_trim(text: string, limit?: number): string;
|
|
10
|
+
export declare function text_encodeUTF8(text: string, limit?: number): string;
|
|
11
|
+
export declare function text_remove_newlines(text: string): string;
|
|
12
|
+
/**
|
|
13
|
+
*
|
|
14
|
+
* For category name unicode filtering, the reference is https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
|
|
15
|
+
*
|
|
16
|
+
* @param text
|
|
17
|
+
* @returns text normalized
|
|
18
|
+
*/
|
|
19
|
+
export declare function text_clean(text: string): string;
|
|
20
|
+
export declare function text_collapse(text: string): string;
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
*
|
|
4
|
+
*
|
|
5
|
+
* Specification implementaton of https://iscc.codes/specification/#text_trim
|
|
6
|
+
*
|
|
7
|
+
* @param text
|
|
8
|
+
* @returns
|
|
9
|
+
*/
|
|
10
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
11
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
12
|
+
};
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
exports.text_collapse = exports.text_clean = exports.text_remove_newlines = exports.text_encodeUTF8 = exports.text_trim = void 0;
|
|
15
|
+
const xregexp_1 = __importDefault(require("xregexp"));
|
|
16
|
+
function text_trim(text, limit) {
|
|
17
|
+
return text_encodeUTF8(text, limit).trim();
|
|
18
|
+
}
|
|
19
|
+
exports.text_trim = text_trim;
|
|
20
|
+
function text_encodeUTF8(text, limit) {
|
|
21
|
+
const encoder = new TextEncoder();
|
|
22
|
+
const utf8Arr = encoder.encode(text);
|
|
23
|
+
const decoder = new TextDecoder('utf-8');
|
|
24
|
+
if (limit && limit > 0) {
|
|
25
|
+
text = decoder.decode(utf8Arr.slice(0, limit));
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
text = decoder.decode(utf8Arr);
|
|
29
|
+
}
|
|
30
|
+
return text;
|
|
31
|
+
}
|
|
32
|
+
exports.text_encodeUTF8 = text_encodeUTF8;
|
|
33
|
+
function text_remove_newlines(text) {
|
|
34
|
+
return text
|
|
35
|
+
.split(/(\s+)/)
|
|
36
|
+
.filter((x) => x.trim().length > 0)
|
|
37
|
+
.join(' ');
|
|
38
|
+
}
|
|
39
|
+
exports.text_remove_newlines = text_remove_newlines;
|
|
40
|
+
function isCharControlCategoryUnicode(s) {
|
|
41
|
+
const cc = (0, xregexp_1.default)('^\\p{Control}+$');
|
|
42
|
+
const cf = (0, xregexp_1.default)('^\\p{Format}+$');
|
|
43
|
+
const cs = (0, xregexp_1.default)('^\\p{Surrogate}+$');
|
|
44
|
+
const co = (0, xregexp_1.default)('^\\p{Private_Use}+$');
|
|
45
|
+
const cn = (0, xregexp_1.default)('^\\p{Unassigned}+$');
|
|
46
|
+
if (cc.test(s) || cf.test(s) || cs.test(s) || co.test(s) || cn.test(s)) {
|
|
47
|
+
return true;
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function isCharNewLinesUnicode(s) {
|
|
54
|
+
const regex = /\u000A|\u000B|\u000C|\u000D|\u0085|\u2028|\u2029/;
|
|
55
|
+
if (regex.test(s)) {
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
*
|
|
64
|
+
* For category name unicode filtering, the reference is https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt
|
|
65
|
+
*
|
|
66
|
+
* @param text
|
|
67
|
+
* @returns text normalized
|
|
68
|
+
*/
|
|
69
|
+
function text_clean(text) {
|
|
70
|
+
text = text.normalize('NFKC');
|
|
71
|
+
let textWithoutCC = '';
|
|
72
|
+
const charsWithCC = Array.from(text);
|
|
73
|
+
// Remove control characters
|
|
74
|
+
for (const c of charsWithCC) {
|
|
75
|
+
if (!isCharControlCategoryUnicode(c) || isCharNewLinesUnicode(c)) {
|
|
76
|
+
textWithoutCC = textWithoutCC.concat(c);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
let textFiltered = '';
|
|
80
|
+
const chars = Array.from(textWithoutCC);
|
|
81
|
+
let newline_count = 0;
|
|
82
|
+
// console.log("text"+text)
|
|
83
|
+
for (const c of chars) {
|
|
84
|
+
if (isCharNewLinesUnicode(c)) {
|
|
85
|
+
//console.log("new line!")
|
|
86
|
+
if (newline_count < 2) {
|
|
87
|
+
// console.log("new line <2")
|
|
88
|
+
textFiltered = textFiltered.concat('\u{000A}');
|
|
89
|
+
newline_count += 1;
|
|
90
|
+
}
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
newline_count = 0;
|
|
95
|
+
}
|
|
96
|
+
textFiltered = textFiltered.concat(c);
|
|
97
|
+
}
|
|
98
|
+
text = textFiltered;
|
|
99
|
+
/* Keep these control characters (Cc) that are commonly considered white-space:
|
|
100
|
+
*/
|
|
101
|
+
// \u0009, # Horizontal Tab (TAB)
|
|
102
|
+
//text = text.replace(/\u0009/gu, ' ');
|
|
103
|
+
// \u000A, # Linefeed
|
|
104
|
+
//text = text.replace(/\u000A/gu, ' ');
|
|
105
|
+
// \u000D, # Carriage Return (CR)
|
|
106
|
+
//text = text.replace(/\u000D/gu, ' ');
|
|
107
|
+
// 6. Keep or remove whitespace (remove duplicate whitespace) according to keep_ws
|
|
108
|
+
// if(!keep_ws){
|
|
109
|
+
// text = text.replace(/\s+/g, ' ');
|
|
110
|
+
// }
|
|
111
|
+
return text.trim();
|
|
112
|
+
}
|
|
113
|
+
exports.text_clean = text_clean;
|
|
114
|
+
function text_collapse(text) {
|
|
115
|
+
text = text.normalize('NFD');
|
|
116
|
+
text = text_trim(text);
|
|
117
|
+
text = text.replace(/\s/g, '');
|
|
118
|
+
text = text.toLowerCase();
|
|
119
|
+
// Filter out all characters that fall into the Unicode categories listed in the constant UNICODE_FILTER: "Cc", "Cf", "Cn", "Co", "Cs", "Mc", "Me", "Mn", "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"
|
|
120
|
+
/*
|
|
121
|
+
gc ; C ; Other # Cc | Cf | Cn | Co | Cs
|
|
122
|
+
gc ; Cc ; Control ; cntrl
|
|
123
|
+
gc ; Cf ; Format
|
|
124
|
+
gc ; Cn ; Unassigned
|
|
125
|
+
gc ; Co ; Private_Use
|
|
126
|
+
gc ; Cs ; Surrogate
|
|
127
|
+
*/
|
|
128
|
+
text = text.replace(/\p{Other}/gu, '');
|
|
129
|
+
/*
|
|
130
|
+
gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn
|
|
131
|
+
gc ; Mc ; Spacing_Mark
|
|
132
|
+
gc ; Me ; Enclosing_Mark
|
|
133
|
+
gc ; Mn ; Nonspacing_Mark
|
|
134
|
+
*/
|
|
135
|
+
text = text.replace(/\p{Mark}/gu, '');
|
|
136
|
+
/*
|
|
137
|
+
gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps
|
|
138
|
+
gc ; Pc ; Connector_Punctuation
|
|
139
|
+
gc ; Pd ; Dash_Punctuation
|
|
140
|
+
gc ; Pe ; Close_Punctuation
|
|
141
|
+
gc ; Pf ; Final_Punctuation
|
|
142
|
+
gc ; Pi ; Initial_Punctuation
|
|
143
|
+
gc ; Po ; Other_Punctuation
|
|
144
|
+
gc ; Ps ; Open_Punctuation
|
|
145
|
+
*/
|
|
146
|
+
text = text.replace(/\p{Punctuation}/gu, '');
|
|
147
|
+
text = text.normalize('NFKC');
|
|
148
|
+
return text;
|
|
149
|
+
}
|
|
150
|
+
exports.text_collapse = text_collapse;
|
|
151
|
+
//# sourceMappingURL=content-normalization.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-normalization.js","sourceRoot":"","sources":["../../src/content-normalization.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;;;;AAEH,sDAA8B;AAE9B,SAAgB,SAAS,CAAC,IAAY,EAAE,KAAc;IAClD,OAAO,eAAe,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;AAC/C,CAAC;AAFD,8BAEC;AAED,SAAgB,eAAe,CAAC,IAAY,EAAE,KAAc;IACxD,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;IAClC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,CAAC,CAAC;IACzC,IAAI,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACrB,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;IACnD,CAAC;SAAM,CAAC;QACJ,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IACnC,CAAC;IACD,OAAO,IAAI,CAAC;AAChB,CAAC;AAVD,0CAUC;AAED,SAAgB,oBAAoB,CAAC,IAAY;IAC7C,OAAO,IAAI;SACN,KAAK,CAAC,OAAO,CAAC;SACd,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;SAClC,IAAI,CAAC,GAAG,CAAC,CAAC;AACnB,CAAC;AALD,oDAKC;AAED,SAAS,4BAA4B,CAAC,CAAS;IAC3C,MAAM,EAAE,GAAG,IAAA,iBAAO,EAAC,iBAAiB,CAAC,CAAC;IACtC,MAAM,EAAE,GAAG,IAAA,iBAAO,EAAC,gBAAgB,CAAC,CAAC;IACrC,MAAM,EAAE,GAAG,IAAA,iBAAO,EAAC,mBAAmB,CAAC,CAAC;IACxC,MAAM,EAAE,GAAG,IAAA,iBAAO,EAAC,qBAAqB,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAA,iBAAO,EAAC,oBAAoB,CAAC,CAAC;IACzC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QACrE,OAAO,IAAI,CAAC;IAChB,CAAC;SAAM,CAAC;QACJ,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC;AAED,SAAS,qBAAqB,CAAC,CAAS;IACpC,MAAM,KAAK,GAAG,kDAAkD,CAAC;IACjE,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IAChB,CAAC;SAAM,CAAC;QACJ,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC;AACD;;;;;;GAMG;AAEH,SAAgB,UAAU,CAAC,IAAY;IACnC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAE9B,IAAI,aAAa,GAAG,EAAE,CAAC;IACvB,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrC,4BAA4B;IAC5B,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC1B,IAAI,CAAC,4BAA4B,CAAC,CAAC,CAAC,IAAI,qBAAqB,CAAC,CAAC,CAAC,EAAE,CAAC;YAC/D,aAAa,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC5C,CAAC;IACL,CAAC;IAED,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACxC,IAAI,aAAa,GAAW,CAAC,CAAC;IAC9B,2BAA2B;IAC3B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACpB,IAAI,qBAAqB,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,0BAA0B;YAC1B,IAAI,aAAa,GAAG,CAAC,EAAE,CAAC;gBACpB,6BAA6B;gBAC7B,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;gBAC/C,aAAa,IAAI,CAAC,CAAC;YACvB,CAAC;YACD,SAAS;QACb,CAAC;aAAM,CAAC;YACJ,aAAa,GAAG,CAAC,CAAC;QACtB,CAAC;QACD,YAAY,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC;IACD,IAAI,GAAG,YAAY,CAAC;IAEpB;OACG;IAEH,iCAAiC;IACjC,uCAAuC;IAEvC,qBAAqB;IACrB,uCAAuC;IAEvC,iCAAiC;IACjC,uCAAuC;IAEvC,kFAAkF;IAClF,iBAAiB;IACjB,yCAAyC;IACzC,IAAI;IAEJ,OAAO,IAAI,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC;AAlDD,gCAkDC;AAED,SAAgB,aAAa,CAAC,IAAY;IACtC,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAC7B,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IACvB,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC/B,IAAI,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAE1B,kMAAkM;IAClM;;;;;;;MAOE;IACF,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAEvC;;;;;MAKE;IACF,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;IAEtC;;;;;;;;;MASE;IACF,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;IAE7C,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAE9B,OAAO,IAAI,CAAC;AAChB,CAAC;AAxCD,sCAwCC","sourcesContent":["/**\n *\n *\n * Specification implementaton of https://iscc.codes/specification/#text_trim\n *\n * @param text\n * @returns\n */\n\nimport XRegExp from 'xregexp';\n\nexport function text_trim(text: string, limit?: number): string {\n return text_encodeUTF8(text, limit).trim();\n}\n\nexport function text_encodeUTF8(text: string, limit?: number): string {\n const encoder = new TextEncoder();\n const utf8Arr = encoder.encode(text);\n const decoder = new TextDecoder('utf-8');\n if (limit && limit > 0) {\n text = decoder.decode(utf8Arr.slice(0, limit));\n } else {\n text = decoder.decode(utf8Arr);\n }\n return text;\n}\n\nexport function text_remove_newlines(text: string): string {\n return text\n .split(/(\\s+)/)\n .filter((x) => x.trim().length > 0)\n .join(' ');\n}\n\nfunction isCharControlCategoryUnicode(s: string) {\n const cc = XRegExp('^\\\\p{Control}+$');\n const cf = XRegExp('^\\\\p{Format}+$');\n const cs = XRegExp('^\\\\p{Surrogate}+$');\n const co = XRegExp('^\\\\p{Private_Use}+$');\n const cn = XRegExp('^\\\\p{Unassigned}+$');\n if (cc.test(s) || cf.test(s) || cs.test(s) || co.test(s) || cn.test(s)) {\n return true;\n } else {\n return false;\n }\n}\n\nfunction isCharNewLinesUnicode(s: string) {\n const regex = /\\u000A|\\u000B|\\u000C|\\u000D|\\u0085|\\u2028|\\u2029/;\n if (regex.test(s)) {\n return true;\n } else {\n return false;\n }\n}\n/**\n *\n * For category name unicode filtering, the reference is https://unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt\n *\n * @param text\n * @returns text normalized\n */\n\nexport function text_clean(text: string): string {\n text = text.normalize('NFKC');\n\n let textWithoutCC = '';\n const charsWithCC = Array.from(text);\n // Remove control characters\n for (const c of charsWithCC) {\n if (!isCharControlCategoryUnicode(c) || isCharNewLinesUnicode(c)) {\n textWithoutCC = textWithoutCC.concat(c);\n }\n }\n\n let textFiltered = '';\n const chars = Array.from(textWithoutCC);\n let newline_count: number = 0;\n // console.log(\"text\"+text)\n for (const c of chars) {\n if (isCharNewLinesUnicode(c)) {\n //console.log(\"new line!\")\n if (newline_count < 2) {\n // console.log(\"new line <2\")\n textFiltered = textFiltered.concat('\\u{000A}');\n newline_count += 1;\n }\n continue;\n } else {\n newline_count = 0;\n }\n textFiltered = textFiltered.concat(c);\n }\n text = textFiltered;\n\n /* Keep these control characters (Cc) that are commonly considered white-space:\n */\n\n // \\u0009, # Horizontal Tab (TAB)\n //text = text.replace(/\\u0009/gu, ' ');\n\n // \\u000A, # Linefeed\n //text = text.replace(/\\u000A/gu, ' ');\n\n // \\u000D, # Carriage Return (CR)\n //text = text.replace(/\\u000D/gu, ' ');\n\n // 6. Keep or remove whitespace (remove duplicate whitespace) according to keep_ws\n // if(!keep_ws){\n // text = text.replace(/\\s+/g, ' ');\n // }\n\n return text.trim();\n}\n\nexport function text_collapse(text: string): string {\n text = text.normalize('NFD');\n text = text_trim(text);\n text = text.replace(/\\s/g, '');\n text = text.toLowerCase();\n\n // Filter out all characters that fall into the Unicode categories listed in the constant UNICODE_FILTER: \"Cc\", \"Cf\", \"Cn\", \"Co\", \"Cs\", \"Mc\", \"Me\", \"Mn\", \"Pc\", \"Pd\", \"Pe\", \"Pf\", \"Pi\", \"Po\", \"Ps\"\n /*\n gc ; C ; Other # Cc | Cf | Cn | Co | Cs\n gc ; Cc ; Control ; cntrl\n gc ; Cf ; Format\n gc ; Cn ; Unassigned\n gc ; Co ; Private_Use\n gc ; Cs ; Surrogate\n */\n text = text.replace(/\\p{Other}/gu, '');\n\n /*\n gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn\n gc ; Mc ; Spacing_Mark\n gc ; Me ; Enclosing_Mark\n gc ; Mn ; Nonspacing_Mark\n */\n text = text.replace(/\\p{Mark}/gu, '');\n\n /*\n gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps\n gc ; Pc ; Connector_Punctuation\n gc ; Pd ; Dash_Punctuation\n gc ; Pe ; Close_Punctuation\n gc ; Pf ; Final_Punctuation\n gc ; Pi ; Initial_Punctuation\n gc ; Po ; Other_Punctuation\n gc ; Ps ; Open_Punctuation\n */\n text = text.replace(/\\p{Punctuation}/gu, '');\n\n text = text.normalize('NFKC');\n\n return text;\n}\n"]}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { IMetaCodeResult } from './constants';
|
|
2
|
+
/**
|
|
3
|
+
*
|
|
4
|
+
* Implementation of
|
|
5
|
+
* https://github.com/iscc/iscc-specs/blob/version-1.0/docs/specification.md#generate-meta-code
|
|
6
|
+
*
|
|
7
|
+
* 1 - Verify the requested ISCC version is supported by your implementation.
|
|
8
|
+
*
|
|
9
|
+
* @param name
|
|
10
|
+
* @param description
|
|
11
|
+
* @param version
|
|
12
|
+
* @returns IMetaCodeResult
|
|
13
|
+
*/
|
|
14
|
+
export declare function gen_meta_code(name: string, description?: string, meta?: string, bits?: number, version?: number): Promise<IMetaCodeResult>;
|
|
15
|
+
/**
|
|
16
|
+
*
|
|
17
|
+
* @param name
|
|
18
|
+
* @param description
|
|
19
|
+
* @returns
|
|
20
|
+
*/
|
|
21
|
+
export declare function gen_meta_code_v0(name: string, description?: string, meta?: string, bits?: number): Promise<IMetaCodeResult>;
|
|
22
|
+
export declare function multi_hash_blake3(data: string): Promise<string>;
|
|
23
|
+
export declare function soft_hash_meta_v0(name: string, extra?: string, descJsonFormat?: boolean): Promise<string>;
|