functionalscript 0.0.383 → 0.0.386
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/doc/vm.md +165 -0
- package/package.json +1 -1
- package/text/encoding/README.md +39 -29
package/doc/vm.md
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# VM
|
|
2
|
+
|
|
3
|
+
[Tagged Pointer](https://en.wikipedia.org/wiki/Tagged_pointer).
|
|
4
|
+
|
|
5
|
+
## Common (3 bit)
|
|
6
|
+
|
|
7
|
+
- `false`
|
|
8
|
+
- `true`
|
|
9
|
+
- `undefined`
|
|
10
|
+
- `""`
|
|
11
|
+
- `+infinity`
|
|
12
|
+
- `-infinity`
|
|
13
|
+
- `-0`
|
|
14
|
+
- `NaN`
|
|
15
|
+
|
|
16
|
+
## 6-bit Id String
|
|
17
|
+
|
|
18
|
+
|symbol |code |# |sum|
|
|
19
|
+
|--------|--------------|--|---|
|
|
20
|
+
|`$` |`\x24` | 1| 1|
|
|
21
|
+
|`0`..`9`|`\x30`..`\x39`| A| B|
|
|
22
|
+
|`A`..`Z`|`\x41`..`\x5A`|1A| 25|
|
|
23
|
+
|`_` |`\x5F` | 1| 26|
|
|
24
|
+
|`a`..`z`|`\x61`..`\x7A`|1A| 40|
|
|
25
|
+
|
|
26
|
+
## 64 bit platform
|
|
27
|
+
|
|
28
|
+
Alignment: 8 bytes.
|
|
29
|
+
|
|
30
|
+
Pointer: 2^64 / 2^3 = 2^61 bit
|
|
31
|
+
|
|
32
|
+
### Value
|
|
33
|
+
|
|
34
|
+
- `63`: 9 x 7 bit string
|
|
35
|
+
- `63`:
|
|
36
|
+
- `61`: pointer + null, alignment - 8 bytes
|
|
37
|
+
- `61`:
|
|
38
|
+
- `60`: 4 x 15 bit string
|
|
39
|
+
- `60`: 10 x 6 bit string
|
|
40
|
+
- `61`:
|
|
41
|
+
- `60`: 6 x 10 bit string
|
|
42
|
+
- `60`: 5 x 12 bit string
|
|
43
|
+
- `61`
|
|
44
|
+
- `60`: float60
|
|
45
|
+
- `60`:
|
|
46
|
+
- `59`: bigInt59 (-576_460_752_303_423_488..576_460_752_303_423_487)
|
|
47
|
+
- `59`:
|
|
48
|
+
- `56`: 8 x 7-bit string
|
|
49
|
+
- `56`: 7 x 8-bit string
|
|
50
|
+
- `53`: int53
|
|
51
|
+
- `53`: stringUInt53
|
|
52
|
+
- `48`: 3 x 16 bit string
|
|
53
|
+
- `32`: 2 x 16 bit string
|
|
54
|
+
- `16`: 1 x UTF16 string
|
|
55
|
+
- `3`: common
|
|
56
|
+
|
|
57
|
+
## Float64
|
|
58
|
+
|
|
59
|
+
https://en.wikipedia.org/wiki/Double-precision_floating-point_format
|
|
60
|
+
|
|
61
|
+
- 1 bit - sign (S)
|
|
62
|
+
- 11 bit - exponent (E)
|
|
63
|
+
- 52 bit - fraction (F)
|
|
64
|
+
|
|
65
|
+
|E |Description |
|
|
66
|
+
|-------------|------------------------|
|
|
67
|
+
|000_0000_0000|F = 0: signed zeros |
|
|
68
|
+
| |F != 0: subnormals |
|
|
69
|
+
|000_0000_0001|E = 2^-1022 |
|
|
70
|
+
|... | |
|
|
71
|
+
|011_1111_1111|E = 2^0 |
|
|
72
|
+
|100_0000_0000|E = 2^1 |
|
|
73
|
+
|... | |
|
|
74
|
+
|111_1111_1110|E = 2^1023 |
|
|
75
|
+
|111_1111_1111|F = 0: signed infinities|
|
|
76
|
+
| |F != 0: NaN |
|
|
77
|
+
|
|
78
|
+
## Float60
|
|
79
|
+
|
|
80
|
+
- 1 bit - sign
|
|
81
|
+
- 7 bit - exponent
|
|
82
|
+
- 52 bit - fraction
|
|
83
|
+
|
|
84
|
+
|E |Description|
|
|
85
|
+
|--------|-----------|
|
|
86
|
+
|000_0000|E = 2^-63 |
|
|
87
|
+
|... | |
|
|
88
|
+
|011_1111|E = 2^0 |
|
|
89
|
+
|100_0000|E = 2^1 |
|
|
90
|
+
|... | |
|
|
91
|
+
|111_1111|E = 2^64 |
|
|
92
|
+
|
|
93
|
+
Note: the type has no `+0`, `-0`, `+inf`, `-inf`, `NaN`.
|
|
94
|
+
|
|
95
|
+
## Object Structure
|
|
96
|
+
|
|
97
|
+
Value Size = 8
|
|
98
|
+
Counter size = max_memory_size / value_size.
|
|
99
|
+
|
|
100
|
+
### Type
|
|
101
|
+
|
|
102
|
+
- `000`: double
|
|
103
|
+
- `001`: string
|
|
104
|
+
- `010`: array
|
|
105
|
+
- `011`: object
|
|
106
|
+
- `100`: bigInt
|
|
107
|
+
- `101`: function
|
|
108
|
+
- `110`:
|
|
109
|
+
- `111`:
|
|
110
|
+
|
|
111
|
+
### Type & Counter
|
|
112
|
+
|
|
113
|
+
- AtomicUSize:
|
|
114
|
+
- `3`: type
|
|
115
|
+
- `...`: counter
|
|
116
|
+
|
|
117
|
+
### String
|
|
118
|
+
|
|
119
|
+
```rust
|
|
120
|
+
struct String {
|
|
121
|
+
length: u32,
|
|
122
|
+
array: [u16; self.length],
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### Function
|
|
127
|
+
|
|
128
|
+
```rust
|
|
129
|
+
struct Function<length: u32> {
|
|
130
|
+
func: pointer,
|
|
131
|
+
array: [value; length]
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### BigInt
|
|
136
|
+
|
|
137
|
+
```rust
|
|
138
|
+
struct BigInt {
|
|
139
|
+
length: u32,
|
|
140
|
+
array: [u64; self.length],
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Array
|
|
145
|
+
|
|
146
|
+
```rust
|
|
147
|
+
struct Array {
|
|
148
|
+
length: u32,
|
|
149
|
+
array: [Value; self.length],
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Object
|
|
154
|
+
|
|
155
|
+
```rust
|
|
156
|
+
struct Object {
|
|
157
|
+
length: u32,
|
|
158
|
+
array: [(Value, Value), self.length],
|
|
159
|
+
indexArray: [u32, (self.length * log2(self.length) + 31) / u32],
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Note: see https://262.ecma-international.org/6.0/#sec-ordinary-object-internal-methods-and-internal-slots-ownpropertykeys and https://262.ecma-international.org/6.0/#sec-object-type
|
|
164
|
+
|
|
165
|
+
An integer index for Node.js, Deno and Bun means a value from `0` to `4294967294` including. 4_294_967_294 = 0xFFFF_FFFE. But an integer index in the ES6 standard is +0..2^53-1.
|
package/package.json
CHANGED
package/text/encoding/README.md
CHANGED
|
@@ -4,34 +4,14 @@
|
|
|
4
4
|
|
|
5
5
|
Requirement: no loss for UTF8 => codepoint => UTF8
|
|
6
6
|
|
|
7
|
-
|utf8 |codepoint |size |
|
|
8
|
-
|
|
9
|
-
|[a] |0xxx_xxxx |7 bit |
|
|
10
|
-
|[b,a] |110x_xxxx 10xx_xxxx |11 bit |
|
|
11
|
-
|[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |
|
|
12
|
-
|[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |
|
|
13
|
-
|
|
14
|
-
|utf8 error|
|
|
15
|
-
|----------|-----------------------------|------|
|
|
16
|
-
|[e] |10xx_xxxx |6 bit |
|
|
17
|
-
|[e] |1111_1xxx |3 bit |
|
|
18
|
-
|[b,] |110x_xxxx |5 bit |
|
|
19
|
-
|[c,] |1110_xxxx |4 bit |
|
|
20
|
-
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|
|
|
21
|
-
|[d,] |1111_0xxx |3 bit |
|
|
22
|
-
|[d,c,] |1111_0xxx 10xx_xxxx |9 bit |
|
|
23
|
-
|[d,c,b,] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|
|
|
24
|
-
|
|
25
|
-
Total error states:
|
|
26
|
-
|
|
27
|
-
- 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
|
|
28
|
-
- 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
|
|
29
|
-
- 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
|
|
30
|
-
- 2^6 + 2^6 + 2^10 + 2^9 + 2^15
|
|
31
|
-
- 2^7 + 2^9 + 2^10 + 2^15
|
|
32
|
-
- < 2^16
|
|
33
|
-
|
|
34
|
-
|utf8 error|codepoint |size |map |
|
|
7
|
+
|utf8 |codepoint |size |codepoint |
|
|
8
|
+
|---------|---------------------------------------|---------|--------------------------------------------------|
|
|
9
|
+
|[a] |0xxx_xxxx |7 bit |0_0000_0000_0000_0xxx_xxxx |
|
|
10
|
+
|[b,a] |110x_xxxx 10xx_xxxx |11 bit |0_0000_0000_0xxx_xxxx_xxxx + 0_0000_0000_1000_0000|
|
|
11
|
+
|[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |0_0000_xxxx_xxxx_xxxx_xxxx + 0_0000_1000_0000_0000|
|
|
12
|
+
|[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |x_xxxx_xxxx_xxxx_xxxx_xxxx + 1_0000_0000_0000_0000|
|
|
13
|
+
|
|
14
|
+
|utf8 error| |size |codepoint |
|
|
35
15
|
|----------|-----------------------------|------|-------------------|
|
|
36
16
|
|[e] |1111_1xxx | 3 bit| |
|
|
37
17
|
|[d,] |1111_0xxx | 3 bit| |
|
|
@@ -42,6 +22,18 @@ Total error states:
|
|
|
42
22
|
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|0000_01xx xxxx_xxxx|
|
|
43
23
|
|[d,c,b,] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|1xxx_xxxx xxxx_xxxx|
|
|
44
24
|
|
|
25
|
+
Total error states:
|
|
26
|
+
|
|
27
|
+
- 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
|
|
28
|
+
- 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
|
|
29
|
+
- 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
|
|
30
|
+
- 2^6 + 2^6 + 2^10 + 2^9 + 2^15
|
|
31
|
+
- 2^7 + 2^9 + 2^10 + 2^15
|
|
32
|
+
- 0b1000_0110_1000_000
|
|
33
|
+
- 128 + 512 + 1024 + 32_768
|
|
34
|
+
- 34_432
|
|
35
|
+
- < 2^16
|
|
36
|
+
|
|
45
37
|
```js
|
|
46
38
|
/** @type {(input: List<u8|undefined>) => List<i32>} */
|
|
47
39
|
const utf8ToCodePoint
|
|
@@ -93,4 +85,22 @@ UTF-16 => CP => UTF-8 => CP = UTF-16
|
|
|
93
85
|
|
|
94
86
|
## Example
|
|
95
87
|
|
|
96
|
-
UTF-
|
|
88
|
+
- UTF-16:
|
|
89
|
+
- 1101_11xx_xxxx_xxxx
|
|
90
|
+
- 1101_11xx_xxxx_xxxx
|
|
91
|
+
- CP:
|
|
92
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
93
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
94
|
+
- UTF-8:
|
|
95
|
+
- 1111_0.101
|
|
96
|
+
- 10.11_xxxx
|
|
97
|
+
- 10xx_xxxx
|
|
98
|
+
- 1111_0.101
|
|
99
|
+
- 10.11_xxxx
|
|
100
|
+
- 10xx_xxxx
|
|
101
|
+
- CP:
|
|
102
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
103
|
+
- 1000_0000_0000_0000_1101_11xx_xxxx_xxxx
|
|
104
|
+
- UTF-16:
|
|
105
|
+
- 1101_11xx_xxxx_xxxx
|
|
106
|
+
- 1101_11xx_xxxx_xxxx
|