functionalscript 0.0.382 → 0.0.385
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/doc/vm.md +136 -0
- package/package.json +1 -1
- package/text/encoding/README.md +42 -28
package/doc/vm.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# VM
|
|
2
|
+
|
|
3
|
+
## Tagged Pointers
|
|
4
|
+
|
|
5
|
+
https://en.wikipedia.org/wiki/Tagged_pointer
|
|
6
|
+
|
|
7
|
+
### Common (3 bit)
|
|
8
|
+
|
|
9
|
+
- `false`
|
|
10
|
+
- `true`
|
|
11
|
+
- `undefined`
|
|
12
|
+
- `""`
|
|
13
|
+
- `-infinity`
|
|
14
|
+
- `+infinity`
|
|
15
|
+
- `-0`
|
|
16
|
+
- `NaN`
|
|
17
|
+
|
|
18
|
+
### 32 bit platform
|
|
19
|
+
|
|
20
|
+
- `31` bigInt31 (-1_073_741_824..1_073_741_823)
|
|
21
|
+
- `31`:
|
|
22
|
+
- `30`: pointer + null, alignment - 4 bytes.
|
|
23
|
+
- `30`:
|
|
24
|
+
- `29`:
|
|
25
|
+
- `28`: 4 x 7 bit string
|
|
26
|
+
- `28`: 2 x 14 bit string
|
|
27
|
+
- `29`:
|
|
28
|
+
- `28`: int28 (-134_217_728..134_217_727)
|
|
29
|
+
- `28`:
|
|
30
|
+
- `27`: 3 x 9 bit string
|
|
31
|
+
- `20`: a UTF-16 surrogate pair
|
|
32
|
+
- `16`: 1 x 16 bit string
|
|
33
|
+
- `3`: common
|
|
34
|
+
|
|
35
|
+
### 64 bit platform
|
|
36
|
+
|
|
37
|
+
- `63`: 9 x 7 bit string
|
|
38
|
+
- `63`:
|
|
39
|
+
- `62`:
|
|
40
|
+
- `61`: pointer + null, alignment - 8 bytes
|
|
41
|
+
- `61`: float61
|
|
42
|
+
- `62`:
|
|
43
|
+
- `60`: 6 x 10 bit string
|
|
44
|
+
- `60`: 5 x 12 bit string
|
|
45
|
+
- `60`: 4 x 15 bit string
|
|
46
|
+
- `60`:
|
|
47
|
+
- `59`: bigInt59 (-576_460_752_303_423_488..576_460_752_303_423_487)
|
|
48
|
+
- `59`:
|
|
49
|
+
- `56`: 8 x 7-bit string
|
|
50
|
+
- `56`: 7 x 8-bit string
|
|
51
|
+
- `48`: 3 x 16 bit string
|
|
52
|
+
- `32`: 2 x 16 bit string
|
|
53
|
+
- `32`: int32
|
|
54
|
+
- `16`: 1 x UTF16 string
|
|
55
|
+
- `3`: common
|
|
56
|
+
|
|
57
|
+
### Float64
|
|
58
|
+
|
|
59
|
+
https://en.wikipedia.org/wiki/Double-precision_floating-point_format
|
|
60
|
+
|
|
61
|
+
- 1 bit - sign (S)
|
|
62
|
+
- 11 bit - exponent (E)
|
|
63
|
+
- 52 bit - fraction (F)
|
|
64
|
+
|
|
65
|
+
|E |Description |
|
|
66
|
+
|-------------|------------------------|
|
|
67
|
+
|000_0000_0000|F = 0: signed zeros |
|
|
68
|
+
| |F != 0: subnormals |
|
|
69
|
+
|000_0000_0001|E = 2^-1022 |
|
|
70
|
+
|... | |
|
|
71
|
+
|011_1111_1111|E = 2^0 |
|
|
72
|
+
|100_0000_0000|E = 2^1 |
|
|
73
|
+
|... | |
|
|
74
|
+
|111_1111_1110|E = 2^1023 |
|
|
75
|
+
|111_1111_1111|F = 0: signed infinities|
|
|
76
|
+
| |F != 0: NaN |
|
|
77
|
+
|
|
78
|
+
### Float61
|
|
79
|
+
|
|
80
|
+
- 1 bit - sign
|
|
81
|
+
- 8 bit - exponent
|
|
82
|
+
- 52 bit - fraction
|
|
83
|
+
|
|
84
|
+
|E |Description|
|
|
85
|
+
|---------|-----------|
|
|
86
|
+
|0000_0000|E = 2^-127 |
|
|
87
|
+
|... | |
|
|
88
|
+
|0111_1111|E = 2^0 |
|
|
89
|
+
|1000_0000|E = 2^1 |
|
|
90
|
+
|... | |
|
|
91
|
+
|1111_1111|E = 2^128 |
|
|
92
|
+
|
|
93
|
+
## Object Structure
|
|
94
|
+
|
|
95
|
+
- type&counter: 32 + float64: 64
|
|
96
|
+
- type&counter: 32 + int32: 32
|
|
97
|
+
- type&counter: 32 + object: len: 32 + payload
|
|
98
|
+
- type&counter: 32 + array: len: 32 + payload
|
|
99
|
+
- type&counter: 32 + function: 32 + 32
|
|
100
|
+
- type&counter: 32 + string: len: 32 + payload
|
|
101
|
+
- type&counter: 32 + bigint: len: 32 + payload
|
|
102
|
+
|
|
103
|
+
minimal size: 64 bit = 8 byte. It means that a number of objects can't be more than 2^(32-3) on 32-bit platform and 2^(64-3) on 64-bit platform.
|
|
104
|
+
counter:
|
|
105
|
+
- 32 - 3 = 29 bit,
|
|
106
|
+
- 64 - 3 = 61 bit.
|
|
107
|
+
|
|
108
|
+
Max length of JS string/array/object can't be bigger that 2^53-1
|
|
109
|
+
|
|
110
|
+
## Type
|
|
111
|
+
|
|
112
|
+
- 000 float64
|
|
113
|
+
- 001 string
|
|
114
|
+
- 010 object
|
|
115
|
+
- 011 array
|
|
116
|
+
- 100 function
|
|
117
|
+
- 101 bigint
|
|
118
|
+
- 110 int32
|
|
119
|
+
- 111 ...
|
|
120
|
+
|
|
121
|
+
## Type & counter
|
|
122
|
+
|
|
123
|
+
|field |x32 |x64 |
|
|
124
|
+
|-------|------|------|
|
|
125
|
+
|type |29..31|61..63|
|
|
126
|
+
|counter|0..28 |0..62 |
|
|
127
|
+
|
|
128
|
+
## Using Pools for small objects
|
|
129
|
+
|
|
130
|
+
|type |size on x32|size on x64|
|
|
131
|
+
|------------|-----------|-----------|
|
|
132
|
+
|float64 |4+8 = 12 |8+8 = 16 |
|
|
133
|
+
|function |4+4 = 8 |8+8 = 16 |
|
|
134
|
+
|int32 |4+4 = 8 |8+4 = 12 |
|
|
135
|
+
|empty object|4+4 = 8 |8+8 = 16 |
|
|
136
|
+
|empty array |4+4 = 8 |8+8 = 16 |
|
package/package.json
CHANGED
package/text/encoding/README.md
CHANGED
|
@@ -4,34 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
Requirement: no loss for UTF8 => codepoint => UTF8
|
|
6
6
|
|
|
7
|
-
|utf8 |codepoint |size |
|
|
8
|
-
|
|
9
|
-
|[a] |0xxx_xxxx |7 bit |
|
|
10
|
-
|[b,a] |110x_xxxx 10xx_xxxx |11 bit |
|
|
11
|
-
|[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |
|
|
12
|
-
|[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |
|
|
13
|
-
|
|
14
|
-
|utf8 error|
|
|
15
|
-
|----------|-----------------------------|------|
|
|
16
|
-
|[e] |10xx_xxxx |6 bit |
|
|
17
|
-
|[e] |1111_1xxx |3 bit |
|
|
18
|
-
|[b,] |110x_xxxx |5 bit |
|
|
19
|
-
|[c,] |1110_xxxx |4 bit |
|
|
20
|
-
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|
|
|
21
|
-
|[d,] |1111_0xxx |3 bit |
|
|
22
|
-
|[d,c,] |1111_0xxx 10xx_xxxx |9 bit |
|
|
23
|
-
|[d,c,b,] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|
|
|
24
|
-
|
|
25
|
-
Total error states:
|
|
26
|
-
|
|
27
|
-
- 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
|
|
28
|
-
- 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
|
|
29
|
-
- 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
|
|
30
|
-
- 2^6 + 2^6 + 2^10 + 2^9 + 2^15
|
|
31
|
-
- 2^7 + 2^9 + 2^10 + 2^15
|
|
32
|
-
- < 2^16
|
|
33
|
-
|
|
34
|
-
|utf8 error|codepoint |size |map |
|
|
7
|
+
|utf8 |codepoint |size |codepoint |
|
|
8
|
+
|---------|---------------------------------------|---------|--------------------------------------------------|
|
|
9
|
+
|[a] |0xxx_xxxx |7 bit |0_0000_0000_0000_0xxx_xxxx |
|
|
10
|
+
|[b,a] |110x_xxxx 10xx_xxxx |11 bit |0_0000_0000_0xxx_xxxx_xxxx + 0_0000_0000_1000_0000|
|
|
11
|
+
|[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |0_0000_xxxx_xxxx_xxxx_xxxx + 0_0000_1000_0000_0000|
|
|
12
|
+
|[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |x_xxxx_xxxx_xxxx_xxxx_xxxx + 1_0000_0000_0000_0000|
|
|
13
|
+
|
|
14
|
+
|utf8 error| |size |codepoint |
|
|
35
15
|
|----------|-----------------------------|------|-------------------|
|
|
36
16
|
|[e] |1111_1xxx | 3 bit| |
|
|
37
17
|
|[d,] |1111_0xxx | 3 bit| |
|
|
@@ -42,6 +22,18 @@ Total error states:
|
|
|
42
22
|
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|0000_01xx xxxx_xxxx|
|
|
43
23
|
|[d,c,b,] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|1xxx_xxxx xxxx_xxxx|
|
|
44
24
|
|
|
25
|
+
Total error states:
|
|
26
|
+
|
|
27
|
+
- 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
|
|
28
|
+
- 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
|
|
29
|
+
- 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
|
|
30
|
+
- 2^6 + 2^6 + 2^10 + 2^9 + 2^15
|
|
31
|
+
- 2^7 + 2^9 + 2^10 + 2^15
|
|
32
|
+
- 0b1000_0110_1000_000
|
|
33
|
+
- 128 + 512 + 1024 + 32_768
|
|
34
|
+
- 34_432
|
|
35
|
+
- < 2^16
|
|
36
|
+
|
|
45
37
|
```js
|
|
46
38
|
/** @type {(input: List<u8|undefined>) => List<i32>} */
|
|
47
39
|
const utf8ToCodePoint
|
|
@@ -90,3 +82,25 @@ const utf16ListToString
|
|
|
90
82
|
```
|
|
91
83
|
|
|
92
84
|
UTF-16 => CP => UTF-8 => CP = UTF-16
|
|
85
|
+
|
|
86
|
+
## Example
|
|
87
|
+
|
|
88
|
+
- UTF-16:
|
|
89
|
+
- 1101_11xx_xxxx_xxxx
|
|
90
|
+
- 1101_11xx_xxxx_xxxx
|
|
91
|
+
- CP:
|
|
92
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
93
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
94
|
+
- UTF-8:
|
|
95
|
+
- 1111_0.101
|
|
96
|
+
- 10.11_xxxx
|
|
97
|
+
- 10xx_xxxx
|
|
98
|
+
- 1111_0.101
|
|
99
|
+
- 10.11_xxxx
|
|
100
|
+
- 10xx_xxxx
|
|
101
|
+
- CP:
|
|
102
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
103
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
104
|
+
- UTF-16:
|
|
105
|
+
- 1101_11xx_xxxx_xxxx
|
|
106
|
+
- 1101_11xx_xxxx_xxxx
|