telo 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +21 -0
- package/dist/index.test.d.mts +1 -0
- package/dist/index.test.mjs +21 -0
- package/dist/types.d.ts +11 -0
- package/dist/types.js +12 -0
- package/package.json +42 -4
- package/index.js +0 -1
package/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Telo is a compact library for creating, serializing, and compressing WARC (Web ARChive) records from HTML pages or text content. It supports WARC 1.1 and produces gzip-compressed output ready for storage or transfer.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* Create WARC records (RESPONSE by default) with URL, HTTP headers, and custom metadata.
|
|
7
|
+
* Serialize WARCInfo records with metadata.
|
|
8
|
+
* Asynchronous content streaming for large pages.
|
|
9
|
+
* Gzip compression of combined WARCInfo and record.
|
|
10
|
+
* Fully compatible with WARC 1.1.
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Installation
|
|
14
|
+
|
|
15
|
+
```shell
|
|
16
|
+
npm install telo
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
### Usage
|
|
21
|
+
|
|
22
|
+
```js
|
|
23
|
+
import {gunzipSync} from 'node:zlib';
|
|
24
|
+
import {Headers} from 'http-directives';
|
|
25
|
+
|
|
26
|
+
const telo = new Telo();
|
|
27
|
+
|
|
28
|
+
const record = await telo.createRecord(
|
|
29
|
+
TeloTypes.RESPONSE,
|
|
30
|
+
'http://example.com',
|
|
31
|
+
'<body />',
|
|
32
|
+
{
|
|
33
|
+
[Headers.CONTENT_TYPE]: 'text/html; charset=UTF-8'
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
Browser: 'Firefox'
|
|
37
|
+
}
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
const uncompressed = gunzipSync(record);
|
|
41
|
+
const text = uncompressed.toString('utf-8');
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
### API
|
|
46
|
+
|
|
47
|
+
```ts
|
|
48
|
+
Telo.createRecord(type, url, page, httpHeaders?, info?);
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
* **type** — WARC record type (default: RESPONSE)
|
|
52
|
+
* **url** — URL of the page
|
|
53
|
+
* **page** — HTML or text content
|
|
54
|
+
* **httpHeaders** — optional HTTP headers object
|
|
55
|
+
* **info** — optional metadata object
|
|
56
|
+
|
|
57
|
+
Returns: `Promise<Buffer>` — gzip-compressed WARC record.
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
Telo.serializeInfo(filename, info);
|
|
61
|
+
```
|
|
62
|
+
* **filename** — WARC filename
|
|
63
|
+
* **info** — metadata object
|
|
64
|
+
|
|
65
|
+
Returns: `Promise<Uint8Array>` — serialized WARCInfo record.
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
### License
|
|
69
|
+
|
|
70
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { TeloTypes } from '#types.ts';
|
|
2
|
+
import { type WARCType } from 'warcio';
|
|
3
|
+
type Info = Record<string, any>;
|
|
4
|
+
declare class Telo {
|
|
5
|
+
private content;
|
|
6
|
+
static serializeInfo(filename: string, info: Record<string, string>): Promise<Uint8Array>;
|
|
7
|
+
createRecord(type: WARCType | undefined, url: string, page: string, httpHeaders?: HeadersInit, info?: Info): Promise<Buffer>;
|
|
8
|
+
}
|
|
9
|
+
export { Telo, TeloTypes };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { gzipSync } from 'node:zlib';
|
|
2
|
+
import { TeloTypes } from '#types.ts';
|
|
3
|
+
import { WARC_1_1, WARCRecord, WARCSerializer } from 'warcio';
|
|
4
|
+
class Telo {
|
|
5
|
+
async *content(page) {
|
|
6
|
+
const textEncoder = new TextEncoder();
|
|
7
|
+
yield textEncoder.encode(page);
|
|
8
|
+
}
|
|
9
|
+
static async serializeInfo(filename, info) {
|
|
10
|
+
const warcInfo = await WARCRecord.createWARCInfo({ warcVersion: WARC_1_1, filename }, info);
|
|
11
|
+
return WARCSerializer.serialize(warcInfo);
|
|
12
|
+
}
|
|
13
|
+
async createRecord(type = TeloTypes.RESPONSE, url, page, httpHeaders = {}, info = {}) {
|
|
14
|
+
const record = await WARCRecord.create({ url, httpHeaders, type }, this.content(page));
|
|
15
|
+
const serializedRecord = await WARCSerializer.serialize(record);
|
|
16
|
+
const serializedInfo = await Telo.serializeInfo(url, info);
|
|
17
|
+
const buffer = Buffer.concat([serializedInfo, serializedRecord]);
|
|
18
|
+
return gzipSync(buffer);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export { Telo, TeloTypes };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { gunzipSync } from 'node:zlib';
|
|
2
|
+
import { Headers } from 'http-directives';
|
|
3
|
+
import { describe, it, expect } from 'vitest';
|
|
4
|
+
import { Telo, TeloTypes } from '#index.ts';
|
|
5
|
+
describe('Telo', () => {
|
|
6
|
+
it('Should create a valid gzip WARC record', async () => {
|
|
7
|
+
const warc = new Telo();
|
|
8
|
+
const record = await warc.createRecord(TeloTypes.RESPONSE, 'http://example.com', '<body />', {
|
|
9
|
+
[Headers.CONTENT_TYPE]: 'text/html; charset=UTF-8'
|
|
10
|
+
}, {
|
|
11
|
+
Browser: 'Firefox'
|
|
12
|
+
});
|
|
13
|
+
const uncompressed = gunzipSync(record);
|
|
14
|
+
const text = uncompressed.toString('utf-8');
|
|
15
|
+
expect(text).toContain('WARC/1.1');
|
|
16
|
+
expect(text).toContain('http://example.com');
|
|
17
|
+
expect(text).toContain('Content-Type: text/html; charset=UTF-8');
|
|
18
|
+
expect(text).toContain('Browser: Firefox');
|
|
19
|
+
expect(text).toContain('<body />');
|
|
20
|
+
});
|
|
21
|
+
});
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
declare enum TeloTypes {
|
|
2
|
+
WARCINFO = "warcinfo",
|
|
3
|
+
RESPONSE = "response",
|
|
4
|
+
RESOURCE = "resource",
|
|
5
|
+
REQUEST = "request",
|
|
6
|
+
METADATA = "metadata",
|
|
7
|
+
REVISIT = "revisit",
|
|
8
|
+
CONVERSION = "conversion",
|
|
9
|
+
CONTINUATION = "continuation"
|
|
10
|
+
}
|
|
11
|
+
export { TeloTypes };
|
package/dist/types.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
var TeloTypes;
|
|
2
|
+
(function (TeloTypes) {
|
|
3
|
+
TeloTypes["WARCINFO"] = "warcinfo";
|
|
4
|
+
TeloTypes["RESPONSE"] = "response";
|
|
5
|
+
TeloTypes["RESOURCE"] = "resource";
|
|
6
|
+
TeloTypes["REQUEST"] = "request";
|
|
7
|
+
TeloTypes["METADATA"] = "metadata";
|
|
8
|
+
TeloTypes["REVISIT"] = "revisit";
|
|
9
|
+
TeloTypes["CONVERSION"] = "conversion";
|
|
10
|
+
TeloTypes["CONTINUATION"] = "continuation";
|
|
11
|
+
})(TeloTypes || (TeloTypes = {}));
|
|
12
|
+
export { TeloTypes };
|
package/package.json
CHANGED
|
@@ -1,6 +1,44 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
2
|
+
"name": "telo",
|
|
3
|
+
"version": "0.0.2",
|
|
4
|
+
"description": "Create, serialize, and gzip WARC 1.1 records from HTML or text content.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"author": "Telo, Inc.",
|
|
7
|
+
"license": "",
|
|
8
|
+
"keywords": [
|
|
9
|
+
"warc",
|
|
10
|
+
"web-archive"
|
|
11
|
+
],
|
|
12
|
+
"files": [
|
|
13
|
+
"dist"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsc",
|
|
17
|
+
"prepublishOnly": "npm run build && npm test",
|
|
18
|
+
"test": "vitest run"
|
|
19
|
+
},
|
|
20
|
+
"imports": {
|
|
21
|
+
"#*.ts": {
|
|
22
|
+
"telo": "./src/*.ts",
|
|
23
|
+
"default": "./dist/*.js"
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
"exports": {
|
|
27
|
+
".": "./dist/index.js",
|
|
28
|
+
"./*": "./dist/*"
|
|
29
|
+
},
|
|
30
|
+
"simple-git-hooks": {
|
|
31
|
+
"pre-commit": "npm test"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"warcio": "2.4.7"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@types/cli-progress": "3.11.6",
|
|
38
|
+
"@types/node": "24.2.1",
|
|
39
|
+
"http-directives": "^1.0.6",
|
|
40
|
+
"simple-git-hooks": "2.13.1",
|
|
41
|
+
"typescript": "5.9.2",
|
|
42
|
+
"vitest": "3.2.4"
|
|
43
|
+
}
|
|
6
44
|
}
|
package/index.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|