@magicpages/ghost-typesense-core 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -1
- package/dist/index.d.mts +4 -2
- package/dist/index.d.ts +4 -2
- package/dist/index.js +21 -4
- package/dist/index.mjs +21 -4
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1 +1,146 @@
|
|
|
1
|
-
|
|
1
|
+
# @magicpages/ghost-typesense-core
|
|
2
|
+
|
|
3
|
+
Core functionality for Ghost-Typesense integration. This package provides the essential services for indexing Ghost CMS content in Typesense.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🔄 Seamless synchronization between Ghost CMS and Typesense
|
|
8
|
+
- 🔍 Automatic content transformation and indexing
|
|
9
|
+
- ⚙️ Flexible configuration for custom fields and schema
|
|
10
|
+
- 🚀 Efficient pagination handling for large Ghost sites
|
|
11
|
+
- 🧩 TypeScript interfaces for type safety
|
|
12
|
+
- 📝 Automatic plaintext generation from HTML content to make sure the most relevant content is indexed
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @magicpages/ghost-typesense-core
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Usage
|
|
21
|
+
|
|
22
|
+
The core package provides the `GhostTypesenseManager` class which handles all interactions between Ghost and Typesense.
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { GhostTypesenseManager } from '@magicpages/ghost-typesense-core';
|
|
26
|
+
import { config } from './config';
|
|
27
|
+
|
|
28
|
+
async function main() {
|
|
29
|
+
// Initialize the manager with your configuration
|
|
30
|
+
const manager = new GhostTypesenseManager(config);
|
|
31
|
+
|
|
32
|
+
// Create or recreate the Typesense collection with proper schema
|
|
33
|
+
await manager.initializeCollection();
|
|
34
|
+
|
|
35
|
+
// Index all posts from Ghost to Typesense
|
|
36
|
+
await manager.indexAllPosts();
|
|
37
|
+
|
|
38
|
+
// You can also index or delete individual posts
|
|
39
|
+
await manager.indexPost('post-id');
|
|
40
|
+
await manager.deletePost('post-id');
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
main().catch(console.error);
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Configuration
|
|
47
|
+
|
|
48
|
+
This package requires a configuration object that follows the schema defined in `@magicpages/ghost-typesense-config`. The minimal configuration includes:
|
|
49
|
+
|
|
50
|
+
```typescript
|
|
51
|
+
const config = {
|
|
52
|
+
ghost: {
|
|
53
|
+
url: 'https://your-ghost-blog.com',
|
|
54
|
+
key: 'your-content-api-key'
|
|
55
|
+
},
|
|
56
|
+
typesense: {
|
|
57
|
+
nodes: [{
|
|
58
|
+
host: 'your-typesense-host',
|
|
59
|
+
port: 443,
|
|
60
|
+
protocol: 'https'
|
|
61
|
+
}],
|
|
62
|
+
apiKey: 'your-admin-api-key',
|
|
63
|
+
connectionTimeoutSeconds: 10,
|
|
64
|
+
retryIntervalSeconds: 0.1
|
|
65
|
+
},
|
|
66
|
+
collection: {
|
|
67
|
+
name: 'ghost',
|
|
68
|
+
fields: [
|
|
69
|
+
{ name: 'id', type: 'string', index: true },
|
|
70
|
+
{ name: 'title', type: 'string', index: true, sort: true },
|
|
71
|
+
{ name: 'slug', type: 'string', index: true },
|
|
72
|
+
{ name: 'html', type: 'string', index: true },
|
|
73
|
+
{ name: 'plaintext', type: 'string', index: true },
|
|
74
|
+
{ name: 'excerpt', type: 'string', index: true },
|
|
75
|
+
{ name: 'feature_image', type: 'string', index: false, optional: true },
|
|
76
|
+
{ name: 'published_at', type: 'int64', sort: true },
|
|
77
|
+
{ name: 'updated_at', type: 'int64', sort: true },
|
|
78
|
+
{ name: 'tags', type: 'string[]', facet: true, optional: true },
|
|
79
|
+
{ name: 'authors', type: 'string[]', facet: true, optional: true }
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## API Reference
|
|
86
|
+
|
|
87
|
+
### `GhostTypesenseManager`
|
|
88
|
+
|
|
89
|
+
The main class for managing Ghost content in Typesense.
|
|
90
|
+
|
|
91
|
+
#### Constructor
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
constructor(config: Config)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Creates a new instance with the provided configuration.
|
|
98
|
+
|
|
99
|
+
#### Methods
|
|
100
|
+
|
|
101
|
+
- **`async initializeCollection(): Promise<void>`**
|
|
102
|
+
Creates or recreates the Typesense collection with the schema defined in the configuration.
|
|
103
|
+
|
|
104
|
+
- **`async indexAllPosts(): Promise<void>`**
|
|
105
|
+
Fetches all posts from Ghost and indexes them in Typesense. Handles pagination automatically.
|
|
106
|
+
|
|
107
|
+
- **`async indexPost(postId: string): Promise<void>`**
|
|
108
|
+
Fetches a specific post from Ghost and indexes it in Typesense.
|
|
109
|
+
|
|
110
|
+
- **`async deletePost(postId: string): Promise<void>`**
|
|
111
|
+
Deletes a post from the Typesense collection.
|
|
112
|
+
|
|
113
|
+
- **`async clearCollection(): Promise<void>`**
|
|
114
|
+
Removes all documents from the collection and recreates it with the same schema.
|
|
115
|
+
|
|
116
|
+
## Content Transformation
|
|
117
|
+
|
|
118
|
+
The package automatically handles content transformation from Ghost to Typesense, including:
|
|
119
|
+
|
|
120
|
+
- Converting timestamps to numeric formats for sorting
|
|
121
|
+
- Extracting tags and authors as arrays
|
|
122
|
+
- Generating plaintext content from HTML for improved search relevance
|
|
123
|
+
- Ensuring all required fields are properly formatted
|
|
124
|
+
|
|
125
|
+
### Plaintext Generation
|
|
126
|
+
|
|
127
|
+
The plaintext generation process is particularly important for search quality:
|
|
128
|
+
|
|
129
|
+
- Removes script tags and their content to eliminate JavaScript
|
|
130
|
+
- Removes style tags and their content to eliminate CSS
|
|
131
|
+
- Replaces all HTML tags with spaces to preserve word boundaries
|
|
132
|
+
- Replaces HTML entities with spaces
|
|
133
|
+
- Normalizes whitespace by collapsing multiple spaces to single spaces
|
|
134
|
+
- Trims leading and trailing whitespace
|
|
135
|
+
|
|
136
|
+
Manual search tests have shown that this approach is more accurate than using the HTML content alone or Ghost's default plaintext field.
|
|
137
|
+
|
|
138
|
+
## Related Packages
|
|
139
|
+
|
|
140
|
+
- [@magicpages/ghost-typesense-config](https://github.com/magicpages/ghost-typesense/tree/main/packages/config) - Configuration schema and utilities
|
|
141
|
+
- [@magicpages/ghost-typesense-cli](https://github.com/magicpages/ghost-typesense/tree/main/apps/cli) - Command-line tool for managing Ghost content in Typesense
|
|
142
|
+
- [@magicpages/ghost-typesense-search-ui](https://github.com/magicpages/ghost-typesense/tree/main/packages/search-ui) - Search UI component for Ghost themes
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT
|
package/dist/index.d.mts
CHANGED
|
@@ -4,12 +4,14 @@ interface Post {
|
|
|
4
4
|
id: string;
|
|
5
5
|
title: string;
|
|
6
6
|
slug: string;
|
|
7
|
-
html
|
|
7
|
+
html?: string;
|
|
8
|
+
plaintext: string;
|
|
8
9
|
excerpt: string;
|
|
9
10
|
feature_image?: string;
|
|
10
11
|
published_at: number;
|
|
11
12
|
updated_at: number;
|
|
12
|
-
tags?: string[];
|
|
13
|
+
'tags.name'?: string[];
|
|
14
|
+
'tags.slug'?: string[];
|
|
13
15
|
authors?: string[];
|
|
14
16
|
[key: string]: unknown;
|
|
15
17
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -4,12 +4,14 @@ interface Post {
|
|
|
4
4
|
id: string;
|
|
5
5
|
title: string;
|
|
6
6
|
slug: string;
|
|
7
|
-
html
|
|
7
|
+
html?: string;
|
|
8
|
+
plaintext: string;
|
|
8
9
|
excerpt: string;
|
|
9
10
|
feature_image?: string;
|
|
10
11
|
published_at: number;
|
|
11
12
|
updated_at: number;
|
|
12
|
-
tags?: string[];
|
|
13
|
+
'tags.name'?: string[];
|
|
14
|
+
'tags.slug'?: string[];
|
|
13
15
|
authors?: string[];
|
|
14
16
|
[key: string]: unknown;
|
|
15
17
|
}
|
package/dist/index.js
CHANGED
|
@@ -65,7 +65,9 @@ var GhostTypesenseManager = class {
|
|
|
65
65
|
index: field.index,
|
|
66
66
|
optional: field.optional,
|
|
67
67
|
sort: field.sort
|
|
68
|
-
}))
|
|
68
|
+
})),
|
|
69
|
+
enable_nested_fields: true
|
|
70
|
+
// Enable nested fields support
|
|
69
71
|
};
|
|
70
72
|
await this.typesense.collections().create(schema);
|
|
71
73
|
}
|
|
@@ -74,11 +76,23 @@ var GhostTypesenseManager = class {
|
|
|
74
76
|
*/
|
|
75
77
|
transformPost(post) {
|
|
76
78
|
console.log("Transforming post:", post.id, post.title);
|
|
79
|
+
let plaintext = post.plaintext || "";
|
|
80
|
+
if (post.html) {
|
|
81
|
+
let cleanHtml = post.html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "");
|
|
82
|
+
cleanHtml = cleanHtml.replace(/<a[^>]*>([^<]*)<\/a>/gi, " $1 ");
|
|
83
|
+
cleanHtml = cleanHtml.replace(/<(strong|b|em|i|mark|span)[^>]*>([^<]*)<\/(strong|b|em|i|mark|span)>/gi, " $2 ");
|
|
84
|
+
cleanHtml = cleanHtml.replace(/<[^>]*>/g, " ");
|
|
85
|
+
cleanHtml = cleanHtml.replace(/ /gi, " ").replace(/&/gi, "&").replace(/</gi, "<").replace(/>/gi, ">").replace(/"/gi, '"').replace(/'/gi, "'").replace(/&[a-z]+;/gi, " ");
|
|
86
|
+
cleanHtml = cleanHtml.replace(/\s+/g, " ").trim();
|
|
87
|
+
if (!plaintext || cleanHtml.length > plaintext.length) {
|
|
88
|
+
plaintext = cleanHtml;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
77
91
|
const transformed = {
|
|
78
92
|
id: post.id,
|
|
79
93
|
title: post.title,
|
|
80
94
|
slug: post.slug,
|
|
81
|
-
|
|
95
|
+
plaintext,
|
|
82
96
|
excerpt: post.excerpt || "",
|
|
83
97
|
published_at: new Date(post.published_at || Date.now()).getTime(),
|
|
84
98
|
updated_at: new Date(post.updated_at || Date.now()).getTime()
|
|
@@ -88,7 +102,8 @@ var GhostTypesenseManager = class {
|
|
|
88
102
|
}
|
|
89
103
|
const tags = post.tags;
|
|
90
104
|
if (tags && Array.isArray(tags) && tags.length > 0) {
|
|
91
|
-
transformed.
|
|
105
|
+
transformed["tags.name"] = tags.map((tag) => tag.name);
|
|
106
|
+
transformed["tags.slug"] = tags.map((tag) => tag.slug);
|
|
92
107
|
}
|
|
93
108
|
const authors = post.authors;
|
|
94
109
|
if (authors && Array.isArray(authors) && authors.length > 0) {
|
|
@@ -154,7 +169,9 @@ var GhostTypesenseManager = class {
|
|
|
154
169
|
* Index a single post in Typesense
|
|
155
170
|
*/
|
|
156
171
|
async indexPost(postId) {
|
|
157
|
-
const post = await this.ghost.posts.read({
|
|
172
|
+
const post = await this.ghost.posts.read({
|
|
173
|
+
id: postId
|
|
174
|
+
}).include({ tags: true, authors: true }).fetch();
|
|
158
175
|
if (!post.success) {
|
|
159
176
|
throw new Error(`Failed to fetch post ${postId} from Ghost`);
|
|
160
177
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -41,7 +41,9 @@ var GhostTypesenseManager = class {
|
|
|
41
41
|
index: field.index,
|
|
42
42
|
optional: field.optional,
|
|
43
43
|
sort: field.sort
|
|
44
|
-
}))
|
|
44
|
+
})),
|
|
45
|
+
enable_nested_fields: true
|
|
46
|
+
// Enable nested fields support
|
|
45
47
|
};
|
|
46
48
|
await this.typesense.collections().create(schema);
|
|
47
49
|
}
|
|
@@ -50,11 +52,23 @@ var GhostTypesenseManager = class {
|
|
|
50
52
|
*/
|
|
51
53
|
transformPost(post) {
|
|
52
54
|
console.log("Transforming post:", post.id, post.title);
|
|
55
|
+
let plaintext = post.plaintext || "";
|
|
56
|
+
if (post.html) {
|
|
57
|
+
let cleanHtml = post.html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "");
|
|
58
|
+
cleanHtml = cleanHtml.replace(/<a[^>]*>([^<]*)<\/a>/gi, " $1 ");
|
|
59
|
+
cleanHtml = cleanHtml.replace(/<(strong|b|em|i|mark|span)[^>]*>([^<]*)<\/(strong|b|em|i|mark|span)>/gi, " $2 ");
|
|
60
|
+
cleanHtml = cleanHtml.replace(/<[^>]*>/g, " ");
|
|
61
|
+
cleanHtml = cleanHtml.replace(/ /gi, " ").replace(/&/gi, "&").replace(/</gi, "<").replace(/>/gi, ">").replace(/"/gi, '"').replace(/'/gi, "'").replace(/&[a-z]+;/gi, " ");
|
|
62
|
+
cleanHtml = cleanHtml.replace(/\s+/g, " ").trim();
|
|
63
|
+
if (!plaintext || cleanHtml.length > plaintext.length) {
|
|
64
|
+
plaintext = cleanHtml;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
53
67
|
const transformed = {
|
|
54
68
|
id: post.id,
|
|
55
69
|
title: post.title,
|
|
56
70
|
slug: post.slug,
|
|
57
|
-
|
|
71
|
+
plaintext,
|
|
58
72
|
excerpt: post.excerpt || "",
|
|
59
73
|
published_at: new Date(post.published_at || Date.now()).getTime(),
|
|
60
74
|
updated_at: new Date(post.updated_at || Date.now()).getTime()
|
|
@@ -64,7 +78,8 @@ var GhostTypesenseManager = class {
|
|
|
64
78
|
}
|
|
65
79
|
const tags = post.tags;
|
|
66
80
|
if (tags && Array.isArray(tags) && tags.length > 0) {
|
|
67
|
-
transformed.
|
|
81
|
+
transformed["tags.name"] = tags.map((tag) => tag.name);
|
|
82
|
+
transformed["tags.slug"] = tags.map((tag) => tag.slug);
|
|
68
83
|
}
|
|
69
84
|
const authors = post.authors;
|
|
70
85
|
if (authors && Array.isArray(authors) && authors.length > 0) {
|
|
@@ -130,7 +145,9 @@ var GhostTypesenseManager = class {
|
|
|
130
145
|
* Index a single post in Typesense
|
|
131
146
|
*/
|
|
132
147
|
async indexPost(postId) {
|
|
133
|
-
const post = await this.ghost.posts.read({
|
|
148
|
+
const post = await this.ghost.posts.read({
|
|
149
|
+
id: postId
|
|
150
|
+
}).include({ tags: true, authors: true }).fetch();
|
|
134
151
|
if (!post.success) {
|
|
135
152
|
throw new Error(`Failed to fetch post ${postId} from Ghost`);
|
|
136
153
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@magicpages/ghost-typesense-core",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0",
|
|
4
4
|
"description": "Core functionality for Ghost-Typesense integration",
|
|
5
5
|
"author": "MagicPages",
|
|
6
6
|
"license": "MIT",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"typecheck": "tsc --noEmit"
|
|
25
25
|
},
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@magicpages/ghost-typesense-config": "^1.
|
|
27
|
+
"@magicpages/ghost-typesense-config": "^1.6.0",
|
|
28
28
|
"@ts-ghost/content-api": "^4.0.6",
|
|
29
29
|
"@ts-ghost/core-api": "^4.0.6",
|
|
30
30
|
"typesense": "^1.7.2",
|