lzy-codec 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ name: build
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ permissions:
13
+ contents: write
14
+
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Set up Node.js
20
+ uses: actions/setup-node@v4
21
+ with:
22
+ node-version: '18'
23
+
24
+ - name: Install pnpm
25
+ run: npm install -g pnpm
26
+
27
+ - name: Install dependencies
28
+ run: pnpm install
29
+
30
+ - name: Compress JavaScript
31
+ run: |
32
+ pnpm run build
33
+
34
+ - name: Create Release
35
+ id: create_release
36
+ uses: actions/create-release@v1
37
+ env:
38
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
39
+ with:
40
+ tag_name: ${{ github.ref }}
41
+ release_name: Release ${{ github.ref }}
42
+ draft: false
43
+ prerelease: false
44
+
45
+ - name: Upload Release Asset
46
+ uses: actions/upload-release-asset@v1
47
+ env:
48
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
49
+ with:
50
+ upload_url: ${{ steps.create_release.outputs.upload_url }}
51
+ asset_path: ./dist/assets/main.js
52
+ asset_name: lzy-codec-${{ github.ref_name }}.min.js
53
+ asset_content_type: application/javascript
54
+
55
+ publish:
56
+ runs-on: ubuntu-latest
57
+ needs: build
58
+ steps:
59
+ - name: Checkout code
60
+ uses: actions/checkout@v4
61
+
62
+ - name: Set up Node.js
63
+ uses: actions/setup-node@v4
64
+ with:
65
+ node-version: '18'
66
+
67
+ - name: Install pnpm
68
+ run: npm install -g pnpm
69
+
70
+ - name: Install dependencies
71
+ run: pnpm install
72
+
73
+ - name: Login to npm
74
+ run: npm config set //registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
75
+
76
+ - name: Publish to npm
77
+ run: npm publish --access public
package/.prettierrc ADDED
@@ -0,0 +1,5 @@
1
+ {
2
+ "tabWidth": 4,
3
+ "semi": false,
4
+ "singleQuote": true
5
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 李宗英
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # LZY Codec
2
+
3
+ 一種變長文本編解碼方案,支持對Unicode進行編解碼。編解碼效率、存儲空間全面優於UTF-8,未來會替代UTF-8成為新的世界通用編解碼標準。
4
+
5
+ [lzy-codec-js](https://github.com/lizongying/lzy-codec-js)
6
+
7
+ [npm](https://www.npmjs.com/package/lzy-codec-js)
8
+
9
+ ## Other languages
10
+
11
+ [lzy-codec-go](https://github.com/lizongying/lzy-codec-go)
12
+ [lzy-codec-py](https://github.com/lizongying/lzy-codec-py)
13
+
14
+ ## 引用
15
+
16
+ ### node
17
+
18
+ install
19
+
20
+ ```
21
+ npm i lzy-codec-js
22
+ ```
23
+
24
+ package.json
25
+
26
+ ```json
27
+ {
28
+ "type": "module",
29
+ "dependencies": {
30
+ "lzy-codec-js": "^0.1.0"
31
+ }
32
+ }
33
+ ```
34
+
35
+ example
36
+
37
+ ```
38
+ import {
39
+ encodeFromString,
40
+ decodeToString,
41
+ encodeFromBytes,
42
+ decodeToBytes,
43
+ } from 'lzy-codec-js'
44
+
45
+ const testStr = 'Hello 世界!LZY编码测试😀' // 包含emoji(大于0xFFFF的字符)
46
+ console.log(`原始字符串: ${testStr}`)
47
+
48
+ // 编码流程
49
+ const lzyBytes = encodeFromString(testStr)
50
+ console.log(`LZY编码字节: `, lzyBytes)
51
+
52
+ // 解码流程
53
+ const decodedStr = decodeToString(lzyBytes)
54
+ console.log(`解码后字符串: ${decodedStr}`)
55
+ ```
56
+
57
+ ## 讚賞
58
+
59
+ ![image](./screenshots/appreciate.png)
@@ -0,0 +1 @@
1
+ var T=(n,o)=>()=>(o||n((o={exports:{}}).exports,o),o.exports);var a=T((c,l)=>{const s=new Error("invalid unicode");function R(n){return 0<=n&&n<55296||57343<n&&n<=1114111}function u(n){const o=[];for(const e of n)e<128?o.push(e&255):e<16384?(o.push(e>>7&255),o.push((128|e&127)&255)):(o.push(e>>14&255),o.push((128|e>>7&127)&255),o.push((128|e&127)&255));return new Uint8Array(o)}function x(n){const o=[];for(let e=0;e<n.length;e++){const r=n.charCodeAt(e);if(r>=55296&&r<=57343&&e+1<n.length){const d=n.charCodeAt(e+1),t=(r-55296<<10)+(d-56320)+65536;o.push(t),e++}else o.push(r)}return u(o)}function A(n){const e=new TextDecoder("utf-8").decode(n);return x(e)}function h(n){const o=n.length;if(o===0)throw s;let e=-1;for(let f=0;f<o;f++)if((n[f]&128)===0){e=f;break}if(e===-1||o-e===0)throw s;const d=[];let t=0;for(let f=e;f<o;f++){const i=n[f];if(i>>7===0){if(f>e){if(!R(t))throw s;d.push(t)}t=i}else{if(t>8703)throw s;t=t<<7|i&127}}if(!R(t))throw s;return d.push(t),d}function w(n){const o=h(n);let e="";for(const r of o)if(r<=65535)e+=String.fromCharCode(r);else{const d=r-65536,t=55296+(d>>10),f=56320+(d&1023);e+=String.fromCharCode(t,f)}return e}function S(n){const o=w(n);return new TextEncoder("utf-8").encode(o)}typeof l<"u"&&l.exports?(c.encode=u,c.encodeFromString=x,c.encodeFromBytes=A,c.decode=h,c.decodeToString=w,c.decodeToBytes=S):typeof window<"u"&&(window.encode=u,window.encodeFromString=x,window.encodeFromBytes=A,window.decode=h,window.decodeToString=w,window.decodeToBytes=S)});export default a();
@@ -0,0 +1,47 @@
1
+ import js from "@eslint/js";
2
+ import globals from "globals";
3
+ import prettier from "eslint-config-prettier";
4
+ import eslintPluginImport from "eslint-plugin-import";
5
+
6
+ export default [
7
+ {
8
+ ignores: ["dist/**", "node_modules/**", "src/tests/**", "vite.config.js"],
9
+ },
10
+ {
11
+ files: ["src/**/*.{js}"],
12
+ ...js.configs.recommended,
13
+ languageOptions: {
14
+ ecmaVersion: 'latest',
15
+ sourceType: 'module',
16
+ globals: {
17
+ ...globals.browser,
18
+ ...globals.node,
19
+ ...globals.worker,
20
+ },
21
+ },
22
+ plugins: {
23
+ import: eslintPluginImport,
24
+ },
25
+ rules: {
26
+ 'import/order': [
27
+ 'error',
28
+ {
29
+ groups: [
30
+ ['builtin', 'external'],
31
+ ['internal'],
32
+ ['parent', 'sibling', 'index'],
33
+ ],
34
+ 'newlines-between': 'always',
35
+ alphabetize: {
36
+ order: 'asc',
37
+ caseInsensitive: true,
38
+ },
39
+ },
40
+ ],
41
+ // 'no-console': 'warn',
42
+ // 'no-debugger': 'error',
43
+ // 'no-unused-vars': ['warn', {argsIgnorePattern: '^_'}],
44
+ },
45
+ },
46
+ prettier,
47
+ ];
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "lzy-codec",
3
+ "version": "0.1.0",
4
+ "description": "一種變長文本編解碼方案,支持對Unicode進行編解碼。編解碼效率、存儲空間全面優於UTF-8,未來會替代UTF-8成為新的世界通用編解碼標準。",
5
+ "main": "dist/assets/main.js",
6
+ "type": "module",
7
+ "scripts": {
8
+ "test": "bun ./src/test.js",
9
+ "check": "npx eslint \"src/**/*.js\"",
10
+ "fix": "npx prettier \"src/**/*.js\" --write",
11
+ "build": "vite build",
12
+ "prepublishOnly": "pnpm run build"
13
+ },
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "git+https://github.com/lizongying/lzy-codec-js.git"
17
+ },
18
+ "keywords": [
19
+ "lzy",
20
+ "lzy-codec",
21
+ "lzy codec"
22
+ ],
23
+ "author": "Li ZongYing",
24
+ "license": "MIT",
25
+ "bugs": {
26
+ "url": "https://github.com/lizongying/lzy-codec-js/issues"
27
+ },
28
+ "homepage": "https://github.com/lizongying/lzy-codec-js#readme",
29
+ "devDependencies": {
30
+ "terser": "^5.30.3"
31
+ },
32
+ "dependencies": {
33
+ "vite": "^7.3.0"
34
+ }
35
+ }
Binary file
package/src/main.js ADDED
@@ -0,0 +1,220 @@
1
+ /*
2
+ https://github.com/lizongying/lzy-codec-js
3
+ */
4
+
5
+ // 定义常量
6
+ const SURROGATE_MIN = 0xd800
7
+ const SURROGATE_MAX = 0xdfff
8
+ const UNICODE_MAX = 0x10ffff
9
+ const ERROR_UNICODE = new Error('invalid unicode')
10
+
11
+ /**
12
+ * 验证一个Unicode码点是否有效(排除代理区字符)
13
+ * @param {number} r - Unicode码点
14
+ * @returns {boolean} 有效性标识
15
+ */
16
+ function validUnicode(r) {
17
+ return (
18
+ (0 <= r && r < SURROGATE_MIN) || (SURROGATE_MAX < r && r <= UNICODE_MAX)
19
+ )
20
+ }
21
+
22
+ /**
23
+ * 将Unicode码点数组(对应Go的rune切片)转换为LZY编码的Uint8Array
24
+ * @param {number[]} inputRunes - 整数数组,每个元素是有效的Unicode码点
25
+ * @returns {Uint8Array} LZY编码的字节序列
26
+ */
27
+ function encode(inputRunes) {
28
+ // 先创建临时数组存储字节,最后转换为Uint8Array
29
+ const outputArr = []
30
+
31
+ for (const r of inputRunes) {
32
+ if (r < 0x80) {
33
+ // 单字节编码:0xxxxxxx
34
+ outputArr.push(r & 0xff)
35
+ } else if (r < 0x4000) {
36
+ // 双字节编码:高7位 + 0x80 | 低7位
37
+ outputArr.push((r >> 7) & 0xff)
38
+ outputArr.push((0x80 | (r & 0x7f)) & 0xff)
39
+ } else {
40
+ // 三字节编码:高7位 + 0x80|中间7位 + 0x80|低7位
41
+ outputArr.push((r >> 14) & 0xff)
42
+ outputArr.push((0x80 | ((r >> 7) & 0x7f)) & 0xff)
43
+ outputArr.push((0x80 | (r & 0x7f)) & 0xff)
44
+ }
45
+ }
46
+
47
+ // 转换为Uint8Array(JS中对应Go的[]byte和Python的bytes)
48
+ return new Uint8Array(outputArr)
49
+ }
50
+
51
+ /**
52
+ * 将UTF-16字符串(JS原生字符串)转换为LZY编码的Uint8Array
53
+ * @param {string} inputStr - JS原生字符串(UTF-16编码,自动兼容Unicode字符)
54
+ * @returns {Uint8Array} LZY编码的字节序列
55
+ */
56
+ function encodeFromString(inputStr) {
57
+ // 将JS字符串转换为Unicode码点数组(对应Go的[]rune和Python的ord列表)
58
+ // 处理大于0xFFFF的字符(代理对),确保完整获取码点
59
+ const runes = []
60
+ for (let i = 0; i < inputStr.length; i++) {
61
+ const charCode = inputStr.charCodeAt(i)
62
+ // 检测代理对(高代理)
63
+ if (
64
+ charCode >= SURROGATE_MIN &&
65
+ charCode <= SURROGATE_MAX &&
66
+ i + 1 < inputStr.length
67
+ ) {
68
+ const lowCharCode = inputStr.charCodeAt(i + 1)
69
+ // 计算完整Unicode码点
70
+ const fullRune =
71
+ ((charCode - SURROGATE_MIN) << 10) +
72
+ (lowCharCode - 0xdc00) +
73
+ 0x10000
74
+ runes.push(fullRune)
75
+ i++ // 跳过低代理
76
+ } else {
77
+ runes.push(charCode)
78
+ }
79
+ }
80
+ return encode(runes)
81
+ }
82
+
83
+ /**
84
+ * 将UTF-8字节序列(Uint8Array)转换为LZY编码的Uint8Array
85
+ * @param {Uint8Array} inputBytes - UTF-8编码的字节序列
86
+ * @returns {Uint8Array} LZY编码的字节序列
87
+ */
88
+ function encodeFromBytes(inputBytes) {
89
+ // 先将UTF-8 Uint8Array解码为JS原生字符串
90
+ // 使用TextDecoder(浏览器/Node.js均支持)
91
+ const decoder = new TextDecoder('utf-8')
92
+ const inputStr = decoder.decode(inputBytes)
93
+ return encodeFromString(inputStr)
94
+ }
95
+
96
+ /**
97
+ * 将LZY编码的Uint8Array解码为Unicode码点数组
98
+ * @param {Uint8Array} inputBytes - LZY编码的字节序列
99
+ * @returns {number[]} Unicode码点数组
100
+ * @throws {Error} 无效LZY编码或Unicode码点时抛出错误
101
+ */
102
+ function decode(inputBytes) {
103
+ const l = inputBytes.length
104
+ if (l === 0) {
105
+ throw ERROR_UNICODE
106
+ }
107
+
108
+ // 寻找第一个最高位为0的字节(有效起始位置)
109
+ let startIdx = -1
110
+ for (let i = 0; i < l; i++) {
111
+ if ((inputBytes[i] & 0x80) === 0) {
112
+ startIdx = i
113
+ break
114
+ }
115
+ }
116
+
117
+ if (startIdx === -1) {
118
+ throw ERROR_UNICODE
119
+ }
120
+
121
+ const validLen = l - startIdx
122
+ if (validLen === 0) {
123
+ throw ERROR_UNICODE
124
+ }
125
+
126
+ const output = []
127
+ // JS数组无需手动预分配,push自动扩容,此处保持逻辑对齐
128
+
129
+ let r = 0
130
+ for (let i = startIdx; i < l; i++) {
131
+ const b = inputBytes[i]
132
+ if (b >> 7 === 0) {
133
+ // 遇到单字节标记,处理上一个累积的码点(非起始位置)
134
+ if (i > startIdx) {
135
+ if (!validUnicode(r)) {
136
+ throw ERROR_UNICODE
137
+ }
138
+ output.push(r)
139
+ }
140
+ // 重置为当前单字节值
141
+ r = b
142
+ } else {
143
+ // 累积码点:左移7位 + 低7位(排除0x80标记位)
144
+ if (r > UNICODE_MAX >> 7) {
145
+ throw ERROR_UNICODE
146
+ }
147
+ r = (r << 7) | (b & 0x7f)
148
+ }
149
+ }
150
+
151
+ // 处理最后一个累积的码点
152
+ if (!validUnicode(r)) {
153
+ throw ERROR_UNICODE
154
+ }
155
+ output.push(r)
156
+
157
+ return output
158
+ }
159
+
160
+ /**
161
+ * 将LZY编码的Uint8Array解码为JS原生字符串(UTF-16)
162
+ * @param {Uint8Array} inputBytes - LZY编码的字节序列
163
+ * @returns {string} JS原生字符串
164
+ * @throws {Error} 无效LZY编码或Unicode码点时抛出错误
165
+ */
166
+ function decodeToString(inputBytes) {
167
+ const runes = decode(inputBytes)
168
+ // 将Unicode码点数组转换为JS字符串(处理代理对)
169
+ let outputStr = ''
170
+ for (const r of runes) {
171
+ if (r <= 0xffff) {
172
+ // 普通字符,直接转换
173
+ outputStr += String.fromCharCode(r)
174
+ } else {
175
+ // 大于0xFFFF的字符,需要转换为代理对
176
+ const offset = r - 0x10000
177
+ const highSurrogate = SURROGATE_MIN + (offset >> 10)
178
+ const lowSurrogate = 0xdc00 + (offset & 0x3ff)
179
+ outputStr += String.fromCharCode(highSurrogate, lowSurrogate)
180
+ }
181
+ }
182
+ return outputStr
183
+ }
184
+
185
+ /**
186
+ * 将LZY编码的Uint8Array解码为UTF-8字节序列(Uint8Array)
187
+ * @param {Uint8Array} inputBytes - LZY编码的字节序列
188
+ * @returns {Uint8Array} UTF-8编码的字节序列
189
+ * @throws {Error} 无效LZY编码或Unicode码点时抛出错误
190
+ */
191
+ function decodeToBytes(inputBytes) {
192
+ const outputStr = decodeToString(inputBytes)
193
+ const encoder = new TextEncoder('utf-8')
194
+ return encoder.encode(outputStr)
195
+ }
196
+
197
+ if (typeof module !== 'undefined' && module.exports) {
198
+ exports.encode = encode
199
+ exports.encodeFromString = encodeFromString
200
+ exports.encodeFromBytes = encodeFromBytes
201
+ exports.decode = decode
202
+ exports.decodeToString = decodeToString
203
+ exports.decodeToBytes = decodeToBytes
204
+ } else if (typeof window !== 'undefined') {
205
+ window.encode = encode
206
+ window.encodeFromString = encodeFromString
207
+ window.encodeFromBytes = encodeFromBytes
208
+ window.decode = decode
209
+ window.decodeToString = decodeToString
210
+ window.decodeToBytes = decodeToBytes
211
+ }
212
+
213
+ export {
214
+ encode,
215
+ encodeFromString,
216
+ encodeFromBytes,
217
+ decode,
218
+ decodeToString,
219
+ decodeToBytes,
220
+ }
package/src/test.js ADDED
@@ -0,0 +1,54 @@
1
+ import {
2
+ encodeFromString,
3
+ decodeToString,
4
+ encodeFromBytes,
5
+ decodeToBytes,
6
+ } from './main.js'
7
+
8
+ // 测试代码(可选,验证功能正确性)
9
+ if (typeof window !== 'undefined' || typeof process !== 'undefined') {
10
+ // 浏览器/Node.js环境均可运行测试
11
+ ;(function testLZY() {
12
+ const testStr = 'Hello 世界!LZY编码测试😀' // 包含emoji(大于0xFFFF的字符)
13
+ console.log(`原始字符串: ${testStr}`)
14
+
15
+ // 编码流程
16
+ const lzyBytes = encodeFromString(testStr)
17
+ console.log(`LZY编码字节: `, lzyBytes)
18
+
19
+ // 解码流程
20
+ const decodedStr = decodeToString(lzyBytes)
21
+ console.log(`解码后字符串: ${decodedStr}`)
22
+
23
+ // 验证一致性
24
+ if (testStr === decodedStr) {
25
+ console.log('✅ 编码解码一致性验证通过')
26
+ } else {
27
+ console.error('❌ 编码解码一致性验证失败')
28
+ }
29
+
30
+ // 测试字节流编码解码
31
+ const utf8Bytes = new TextEncoder().encode(testStr)
32
+ const lzyBytes2 = encodeFromBytes(utf8Bytes)
33
+ const decodedUtf8Bytes = decodeToBytes(lzyBytes2)
34
+
35
+ // 比较Uint8Array是否相等
36
+ let isEqual = true
37
+ if (utf8Bytes.length !== decodedUtf8Bytes.length) {
38
+ isEqual = false
39
+ } else {
40
+ for (let i = 0; i < utf8Bytes.length; i++) {
41
+ if (utf8Bytes[i] !== decodedUtf8Bytes[i]) {
42
+ isEqual = false
43
+ break
44
+ }
45
+ }
46
+ }
47
+
48
+ if (isEqual) {
49
+ console.log('✅ 字节流编码解码一致性验证通过')
50
+ } else {
51
+ console.error('❌ 字节流编码解码一致性验证失败')
52
+ }
53
+ })()
54
+ }
package/vite.config.js ADDED
@@ -0,0 +1,20 @@
1
+ import {resolve} from 'path'
2
+ import {defineConfig} from 'vite'
3
+
4
+ export default defineConfig({
5
+ build: {
6
+ rollupOptions: {
7
+ input: {
8
+ main: resolve(__dirname, './src/main.js'),
9
+ },
10
+ output: {
11
+ entryFileNames: (chunkInfo) => {
12
+ return chunkInfo.name === 'main'
13
+ ? 'assets/main.js'
14
+ : 'assets/[name]-[hash].js'
15
+ },
16
+ },
17
+ },
18
+ outDir: 'dist',
19
+ },
20
+ })