@magicpages/ghost-typesense-core 1.5.1 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1 +1,146 @@
1
-
1
+ # @magicpages/ghost-typesense-core
2
+
3
+ Core functionality for Ghost-Typesense integration. This package provides the essential services for indexing Ghost CMS content in Typesense.
4
+
5
+ ## Features
6
+
7
+ - 🔄 Seamless synchronization between Ghost CMS and Typesense
8
+ - 🔍 Automatic content transformation and indexing
9
+ - ⚙️ Flexible configuration for custom fields and schema
10
+ - 🚀 Efficient pagination handling for large Ghost sites
11
+ - 🧩 TypeScript interfaces for type safety
12
+ - 📝 Automatic plaintext generation from HTML content to make sure the most relevant content is indexed
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install @magicpages/ghost-typesense-core
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ The core package provides the `GhostTypesenseManager` class which handles all interactions between Ghost and Typesense.
23
+
24
+ ```typescript
25
+ import { GhostTypesenseManager } from '@magicpages/ghost-typesense-core';
26
+ import { config } from './config';
27
+
28
+ async function main() {
29
+ // Initialize the manager with your configuration
30
+ const manager = new GhostTypesenseManager(config);
31
+
32
+ // Create or recreate the Typesense collection with proper schema
33
+ await manager.initializeCollection();
34
+
35
+ // Index all posts from Ghost to Typesense
36
+ await manager.indexAllPosts();
37
+
38
+ // You can also index or delete individual posts
39
+ await manager.indexPost('post-id');
40
+ await manager.deletePost('post-id');
41
+ }
42
+
43
+ main().catch(console.error);
44
+ ```
45
+
46
+ ## Configuration
47
+
48
+ This package requires a configuration object that follows the schema defined in `@magicpages/ghost-typesense-config`. The minimal configuration includes:
49
+
50
+ ```typescript
51
+ const config = {
52
+ ghost: {
53
+ url: 'https://your-ghost-blog.com',
54
+ key: 'your-content-api-key'
55
+ },
56
+ typesense: {
57
+ nodes: [{
58
+ host: 'your-typesense-host',
59
+ port: 443,
60
+ protocol: 'https'
61
+ }],
62
+ apiKey: 'your-admin-api-key',
63
+ connectionTimeoutSeconds: 10,
64
+ retryIntervalSeconds: 0.1
65
+ },
66
+ collection: {
67
+ name: 'ghost',
68
+ fields: [
69
+ { name: 'id', type: 'string', index: true },
70
+ { name: 'title', type: 'string', index: true, sort: true },
71
+ { name: 'slug', type: 'string', index: true },
72
+ { name: 'html', type: 'string', index: true },
73
+ { name: 'plaintext', type: 'string', index: true },
74
+ { name: 'excerpt', type: 'string', index: true },
75
+ { name: 'feature_image', type: 'string', index: false, optional: true },
76
+ { name: 'published_at', type: 'int64', sort: true },
77
+ { name: 'updated_at', type: 'int64', sort: true },
78
+ { name: 'tags', type: 'string[]', facet: true, optional: true },
79
+ { name: 'authors', type: 'string[]', facet: true, optional: true }
80
+ ]
81
+ }
82
+ };
83
+ ```
84
+
85
+ ## API Reference
86
+
87
+ ### `GhostTypesenseManager`
88
+
89
+ The main class for managing Ghost content in Typesense.
90
+
91
+ #### Constructor
92
+
93
+ ```typescript
94
+ constructor(config: Config)
95
+ ```
96
+
97
+ Creates a new instance with the provided configuration.
98
+
99
+ #### Methods
100
+
101
+ - **`async initializeCollection(): Promise<void>`**
102
+ Creates or recreates the Typesense collection with the schema defined in the configuration.
103
+
104
+ - **`async indexAllPosts(): Promise<void>`**
105
+ Fetches all posts from Ghost and indexes them in Typesense. Handles pagination automatically.
106
+
107
+ - **`async indexPost(postId: string): Promise<void>`**
108
+ Fetches a specific post from Ghost and indexes it in Typesense.
109
+
110
+ - **`async deletePost(postId: string): Promise<void>`**
111
+ Deletes a post from the Typesense collection.
112
+
113
+ - **`async clearCollection(): Promise<void>`**
114
+ Removes all documents from the collection and recreates it with the same schema.
115
+
116
+ ## Content Transformation
117
+
118
+ The package automatically handles content transformation from Ghost to Typesense, including:
119
+
120
+ - Converting timestamps to numeric formats for sorting
121
+ - Extracting tags and authors as arrays
122
+ - Generating plaintext content from HTML for improved search relevance
123
+ - Ensuring all required fields are properly formatted
124
+
125
+ ### Plaintext Generation
126
+
127
+ The plaintext generation process is particularly important for search quality:
128
+
129
+ - Removes script tags and their content to eliminate JavaScript
130
+ - Removes style tags and their content to eliminate CSS
131
+ - Replaces all HTML tags with spaces to preserve word boundaries
132
+ - Replaces HTML entities with spaces
133
+ - Normalizes whitespace by collapsing multiple spaces to single spaces
134
+ - Trims leading and trailing whitespace
135
+
136
+ Manual search tests have shown that this approach is more accurate than using the HTML content alone or Ghost's default plaintext field.
137
+
138
+ ## Related Packages
139
+
140
+ - [@magicpages/ghost-typesense-config](https://github.com/magicpages/ghost-typesense/tree/main/packages/config) - Configuration schema and utilities
141
+ - [@magicpages/ghost-typesense-cli](https://github.com/magicpages/ghost-typesense/tree/main/apps/cli) - Command-line tool for managing Ghost content in Typesense
142
+ - [@magicpages/ghost-typesense-search-ui](https://github.com/magicpages/ghost-typesense/tree/main/packages/search-ui) - Search UI component for Ghost themes
143
+
144
+ ## License
145
+
146
+ MIT
package/dist/index.d.mts CHANGED
@@ -4,12 +4,14 @@ interface Post {
4
4
  id: string;
5
5
  title: string;
6
6
  slug: string;
7
- html: string;
7
+ html?: string;
8
+ plaintext: string;
8
9
  excerpt: string;
9
10
  feature_image?: string;
10
11
  published_at: number;
11
12
  updated_at: number;
12
- tags?: string[];
13
+ 'tags.name'?: string[];
14
+ 'tags.slug'?: string[];
13
15
  authors?: string[];
14
16
  [key: string]: unknown;
15
17
  }
package/dist/index.d.ts CHANGED
@@ -4,12 +4,14 @@ interface Post {
4
4
  id: string;
5
5
  title: string;
6
6
  slug: string;
7
- html: string;
7
+ html?: string;
8
+ plaintext: string;
8
9
  excerpt: string;
9
10
  feature_image?: string;
10
11
  published_at: number;
11
12
  updated_at: number;
12
- tags?: string[];
13
+ 'tags.name'?: string[];
14
+ 'tags.slug'?: string[];
13
15
  authors?: string[];
14
16
  [key: string]: unknown;
15
17
  }
package/dist/index.js CHANGED
@@ -65,7 +65,9 @@ var GhostTypesenseManager = class {
65
65
  index: field.index,
66
66
  optional: field.optional,
67
67
  sort: field.sort
68
- }))
68
+ })),
69
+ enable_nested_fields: true
70
+ // Enable nested fields support
69
71
  };
70
72
  await this.typesense.collections().create(schema);
71
73
  }
@@ -74,11 +76,23 @@ var GhostTypesenseManager = class {
74
76
  */
75
77
  transformPost(post) {
76
78
  console.log("Transforming post:", post.id, post.title);
79
+ let plaintext = post.plaintext || "";
80
+ if (post.html) {
81
+ let cleanHtml = post.html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "");
82
+ cleanHtml = cleanHtml.replace(/<a[^>]*>([^<]*)<\/a>/gi, " $1 ");
83
+ cleanHtml = cleanHtml.replace(/<(strong|b|em|i|mark|span)[^>]*>([^<]*)<\/(strong|b|em|i|mark|span)>/gi, " $2 ");
84
+ cleanHtml = cleanHtml.replace(/<[^>]*>/g, " ");
85
+ cleanHtml = cleanHtml.replace(/&nbsp;/gi, " ").replace(/&amp;/gi, "&").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">").replace(/&quot;/gi, '"').replace(/&#39;/gi, "'").replace(/&[a-z]+;/gi, " ");
86
+ cleanHtml = cleanHtml.replace(/\s+/g, " ").trim();
87
+ if (!plaintext || cleanHtml.length > plaintext.length) {
88
+ plaintext = cleanHtml;
89
+ }
90
+ }
77
91
  const transformed = {
78
92
  id: post.id,
79
93
  title: post.title,
80
94
  slug: post.slug,
81
- html: post.html,
95
+ plaintext,
82
96
  excerpt: post.excerpt || "",
83
97
  published_at: new Date(post.published_at || Date.now()).getTime(),
84
98
  updated_at: new Date(post.updated_at || Date.now()).getTime()
@@ -88,7 +102,8 @@ var GhostTypesenseManager = class {
88
102
  }
89
103
  const tags = post.tags;
90
104
  if (tags && Array.isArray(tags) && tags.length > 0) {
91
- transformed.tags = tags.map((tag) => tag.name);
105
+ transformed["tags.name"] = tags.map((tag) => tag.name);
106
+ transformed["tags.slug"] = tags.map((tag) => tag.slug);
92
107
  }
93
108
  const authors = post.authors;
94
109
  if (authors && Array.isArray(authors) && authors.length > 0) {
@@ -154,7 +169,9 @@ var GhostTypesenseManager = class {
154
169
  * Index a single post in Typesense
155
170
  */
156
171
  async indexPost(postId) {
157
- const post = await this.ghost.posts.read({ id: postId }).include({ tags: true, authors: true }).fetch();
172
+ const post = await this.ghost.posts.read({
173
+ id: postId
174
+ }).include({ tags: true, authors: true }).fetch();
158
175
  if (!post.success) {
159
176
  throw new Error(`Failed to fetch post ${postId} from Ghost`);
160
177
  }
package/dist/index.mjs CHANGED
@@ -41,7 +41,9 @@ var GhostTypesenseManager = class {
41
41
  index: field.index,
42
42
  optional: field.optional,
43
43
  sort: field.sort
44
- }))
44
+ })),
45
+ enable_nested_fields: true
46
+ // Enable nested fields support
45
47
  };
46
48
  await this.typesense.collections().create(schema);
47
49
  }
@@ -50,11 +52,23 @@ var GhostTypesenseManager = class {
50
52
  */
51
53
  transformPost(post) {
52
54
  console.log("Transforming post:", post.id, post.title);
55
+ let plaintext = post.plaintext || "";
56
+ if (post.html) {
57
+ let cleanHtml = post.html.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, "").replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, "");
58
+ cleanHtml = cleanHtml.replace(/<a[^>]*>([^<]*)<\/a>/gi, " $1 ");
59
+ cleanHtml = cleanHtml.replace(/<(strong|b|em|i|mark|span)[^>]*>([^<]*)<\/(strong|b|em|i|mark|span)>/gi, " $2 ");
60
+ cleanHtml = cleanHtml.replace(/<[^>]*>/g, " ");
61
+ cleanHtml = cleanHtml.replace(/&nbsp;/gi, " ").replace(/&amp;/gi, "&").replace(/&lt;/gi, "<").replace(/&gt;/gi, ">").replace(/&quot;/gi, '"').replace(/&#39;/gi, "'").replace(/&[a-z]+;/gi, " ");
62
+ cleanHtml = cleanHtml.replace(/\s+/g, " ").trim();
63
+ if (!plaintext || cleanHtml.length > plaintext.length) {
64
+ plaintext = cleanHtml;
65
+ }
66
+ }
53
67
  const transformed = {
54
68
  id: post.id,
55
69
  title: post.title,
56
70
  slug: post.slug,
57
- html: post.html,
71
+ plaintext,
58
72
  excerpt: post.excerpt || "",
59
73
  published_at: new Date(post.published_at || Date.now()).getTime(),
60
74
  updated_at: new Date(post.updated_at || Date.now()).getTime()
@@ -64,7 +78,8 @@ var GhostTypesenseManager = class {
64
78
  }
65
79
  const tags = post.tags;
66
80
  if (tags && Array.isArray(tags) && tags.length > 0) {
67
- transformed.tags = tags.map((tag) => tag.name);
81
+ transformed["tags.name"] = tags.map((tag) => tag.name);
82
+ transformed["tags.slug"] = tags.map((tag) => tag.slug);
68
83
  }
69
84
  const authors = post.authors;
70
85
  if (authors && Array.isArray(authors) && authors.length > 0) {
@@ -130,7 +145,9 @@ var GhostTypesenseManager = class {
130
145
  * Index a single post in Typesense
131
146
  */
132
147
  async indexPost(postId) {
133
- const post = await this.ghost.posts.read({ id: postId }).include({ tags: true, authors: true }).fetch();
148
+ const post = await this.ghost.posts.read({
149
+ id: postId
150
+ }).include({ tags: true, authors: true }).fetch();
134
151
  if (!post.success) {
135
152
  throw new Error(`Failed to fetch post ${postId} from Ghost`);
136
153
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@magicpages/ghost-typesense-core",
3
- "version": "1.5.1",
3
+ "version": "1.6.1",
4
4
  "description": "Core functionality for Ghost-Typesense integration",
5
5
  "author": "MagicPages",
6
6
  "license": "MIT",
@@ -24,7 +24,7 @@
24
24
  "typecheck": "tsc --noEmit"
25
25
  },
26
26
  "dependencies": {
27
- "@magicpages/ghost-typesense-config": "^1.5.1",
27
+ "@magicpages/ghost-typesense-config": "^1.6.1",
28
28
  "@ts-ghost/content-api": "^4.0.6",
29
29
  "@ts-ghost/core-api": "^4.0.6",
30
30
  "typesense": "^1.7.2",