@yuji-min/google-docs-parser 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +153 -19
- package/dist/index.d.cts +7 -4
- package/dist/index.d.ts +7 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
**Turn your Google Docs into a Headless CMS.**
|
|
13
13
|
|
|
14
|
-
`google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean data instantly.
|
|
14
|
+
`google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean, **fully-typed** data instantly.
|
|
15
15
|
|
|
16
16
|
---
|
|
17
17
|
|
|
@@ -23,11 +23,12 @@ This library solves that complexity by allowing you to define a **Schema** that
|
|
|
23
23
|
|
|
24
24
|
### ✨ Key Features
|
|
25
25
|
|
|
26
|
-
- **Type-Safe:**
|
|
26
|
+
- **100% Type-Safe:** Use `GetParsedType<typeof schema>` to infer the exact return type from your schema—no manual type definitions needed.
|
|
27
27
|
- **Hierarchical Parsing:** Supports nested tree structures (e.g., _Heading 2_ containing _Heading 3_ children).
|
|
28
|
+
- **Consistent Structure:** Tree nodes always follow `{ title, content }` pattern for predictable data access.
|
|
28
29
|
- **Smart Text Parsing:** Built-in parsers for:
|
|
29
|
-
- Key-Value pairs (e.g., `
|
|
30
|
-
- Delimited fields (e.g., `
|
|
30
|
+
- Key-Value pairs (e.g., `Skills: React, TypeScript`)
|
|
31
|
+
- Delimited fields (e.g., `Engineer | Google | 2024`)
|
|
31
32
|
- Flattened lists or grouped arrays.
|
|
32
33
|
- **Auth Ready:** Seamless integration with `google-auth-library` and `googleapis`.
|
|
33
34
|
|
|
@@ -107,7 +108,11 @@ Imagine a Google Doc structured like a resume or project list:
|
|
|
107
108
|
Create a schema object that mirrors the visual hierarchy of your document.
|
|
108
109
|
|
|
109
110
|
```typescript
|
|
110
|
-
import {
|
|
111
|
+
import {
|
|
112
|
+
getParsedDocument,
|
|
113
|
+
ParseSchema,
|
|
114
|
+
GetParsedType,
|
|
115
|
+
} from "@yuji-min/google-docs-parser";
|
|
111
116
|
|
|
112
117
|
// 1. Define the schema
|
|
113
118
|
const resumeSchema = {
|
|
@@ -124,7 +129,7 @@ const resumeSchema = {
|
|
|
124
129
|
kind: "tree", // This section is a hierarchical tree
|
|
125
130
|
node: {
|
|
126
131
|
// The tree nodes start with "Heading 2"
|
|
127
|
-
//
|
|
132
|
+
// Parse the heading text with delimiter & keys!
|
|
128
133
|
title: {
|
|
129
134
|
namedStyleType: "HEADING_2",
|
|
130
135
|
keys: ["company", "role"],
|
|
@@ -136,16 +141,25 @@ const resumeSchema = {
|
|
|
136
141
|
},
|
|
137
142
|
},
|
|
138
143
|
],
|
|
139
|
-
} as const; // 'as const' is CRITICAL for type inference
|
|
144
|
+
} as const satisfies ParseSchema; // 'as const' is CRITICAL for type inference
|
|
140
145
|
|
|
141
|
-
// 2.
|
|
146
|
+
// 2. Infer the return type from schema (optional but recommended)
|
|
147
|
+
type ResumeData = GetParsedType<typeof resumeSchema>;
|
|
148
|
+
|
|
149
|
+
// 3. Fetch and Parse
|
|
142
150
|
async function main() {
|
|
143
151
|
const docId = "YOUR_GOOGLE_DOC_ID";
|
|
144
152
|
|
|
145
153
|
try {
|
|
146
|
-
// 'data' is fully typed
|
|
154
|
+
// 'data' is fully typed as ResumeData!
|
|
147
155
|
const data = await getParsedDocument(docId, resumeSchema);
|
|
148
156
|
console.log(JSON.stringify(data, null, 2));
|
|
157
|
+
|
|
158
|
+
// ✅ Full type inference - no manual types needed
|
|
159
|
+
const firstJob = data.Experience[0];
|
|
160
|
+
console.log(firstJob.title.company); // "Tech Corp"
|
|
161
|
+
console.log(firstJob.title.role); // "Backend Lead"
|
|
162
|
+
console.log(firstJob.content); // ["Designed microservices...", "Managed..."]
|
|
149
163
|
} catch (error) {
|
|
150
164
|
console.error(error);
|
|
151
165
|
}
|
|
@@ -156,18 +170,18 @@ main();
|
|
|
156
170
|
|
|
157
171
|
### 3. The Result
|
|
158
172
|
|
|
173
|
+
Tree nodes always have a consistent `{ title, content }` structure:
|
|
174
|
+
|
|
159
175
|
```json
|
|
160
176
|
{
|
|
161
177
|
"Profile": "Senior Software Engineer based in Seoul.",
|
|
162
178
|
"Experience": [
|
|
163
179
|
{
|
|
164
|
-
"company": "Tech Corp",
|
|
165
|
-
"role": "Backend Lead",
|
|
180
|
+
"title": { "company": "Tech Corp", "role": "Backend Lead" },
|
|
166
181
|
"content": ["Designed microservices architecture", "Managed a team of 5"]
|
|
167
182
|
},
|
|
168
183
|
{
|
|
169
|
-
"company": "Startup Inc",
|
|
170
|
-
"role": "Full Stack",
|
|
184
|
+
"title": { "company": "Startup Inc", "role": "Full Stack" },
|
|
171
185
|
"content": ["Built MVP in 3 months"]
|
|
172
186
|
}
|
|
173
187
|
]
|
|
@@ -178,7 +192,7 @@ main();
|
|
|
178
192
|
|
|
179
193
|
## 📚 Parsing Schema Guide
|
|
180
194
|
|
|
181
|
-
The `ParseSchema` object controls how the parser reads your document.
|
|
195
|
+
The `ParseSchema` object controls how the parser reads your document. Use `GetParsedType<typeof schema>` to infer the exact TypeScript type from your schema.
|
|
182
196
|
|
|
183
197
|
### Section Configuration
|
|
184
198
|
|
|
@@ -194,20 +208,140 @@ The `ParseSchema` object controls how the parser reads your document.
|
|
|
194
208
|
|
|
195
209
|
If `content` is undefined, the parser collects all paragraphs following the header until the next section starts, joining them into a single string.
|
|
196
210
|
|
|
211
|
+
```typescript
|
|
212
|
+
// Schema
|
|
213
|
+
{ title: { name: "About", namedStyleType: "HEADING_1" } }
|
|
214
|
+
|
|
215
|
+
// Inferred Type → string
|
|
216
|
+
// Result → "Hello, I am a developer."
|
|
217
|
+
```
|
|
218
|
+
|
|
197
219
|
#### 2. List (`kind: "list"`)
|
|
198
220
|
|
|
199
221
|
Parses paragraphs as an array. Useful for bullet points or simple lists.
|
|
200
222
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
223
|
+
| Option | Type | Description |
|
|
224
|
+
| :------------- | :--------- | :------------------------------------------------------- |
|
|
225
|
+
| `isFlatten` | `boolean` | If true, merges multiple lines into a single flat array. |
|
|
226
|
+
| `keyDelimiter` | `string` | Parses `Key: Value` lines into `{ key, value }` objects. |
|
|
227
|
+
| `keys` | `string[]` | Maps delimited values to named fields. |
|
|
228
|
+
| `delimiter` | `string` | Splits a line by a character (default: `,`). |
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
// Schema: Simple list
|
|
232
|
+
{ content: { kind: "list" } }
|
|
233
|
+
// Inferred Type → string[]
|
|
234
|
+
// Result → ["Item 1", "Item 2", "Item 3"]
|
|
235
|
+
|
|
236
|
+
// Schema: Keyed list (Key: Value format)
|
|
237
|
+
{ content: { kind: "list", keyDelimiter: ":", delimiter: "," } }
|
|
238
|
+
// Inferred Type → { key: string; value: string[] }[]
|
|
239
|
+
// Result → [{ key: "Skills", value: ["React", "TypeScript"] }]
|
|
240
|
+
|
|
241
|
+
// Schema: Mapped fields
|
|
242
|
+
{ content: { kind: "list", keys: ["school", "degree"], delimiter: "|" } }
|
|
243
|
+
// Inferred Type → { school: string; degree: string }[]
|
|
244
|
+
// Result → [{ school: "MIT", degree: "B.S. Computer Science" }]
|
|
245
|
+
```
|
|
204
246
|
|
|
205
247
|
#### 3. Tree (`kind: "tree"`)
|
|
206
248
|
|
|
207
|
-
Parses hierarchical structures. Ideal for nested sections like "H2
|
|
249
|
+
Parses hierarchical structures. Ideal for nested sections like "H2 → H3 → Content".
|
|
208
250
|
|
|
209
251
|
- **`node`**: Defines the schema for the child nodes.
|
|
210
|
-
- **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level
|
|
252
|
+
- **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level.
|
|
253
|
+
|
|
254
|
+
**Tree nodes always have a consistent `{ title, content }` structure:**
|
|
255
|
+
|
|
256
|
+
```typescript
|
|
257
|
+
// Schema
|
|
258
|
+
{
|
|
259
|
+
content: {
|
|
260
|
+
kind: "tree",
|
|
261
|
+
node: {
|
|
262
|
+
title: {
|
|
263
|
+
namedStyleType: "HEADING_2",
|
|
264
|
+
keys: ["role", "company"],
|
|
265
|
+
delimiter: "|"
|
|
266
|
+
},
|
|
267
|
+
content: { kind: "list" }
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Inferred Type
|
|
273
|
+
// {
|
|
274
|
+
// title: { role: string; company: string };
|
|
275
|
+
// content: string[];
|
|
276
|
+
// }[]
|
|
277
|
+
|
|
278
|
+
// Result
|
|
279
|
+
[
|
|
280
|
+
{
|
|
281
|
+
"title": { "role": "Engineer", "company": "Google" },
|
|
282
|
+
"content": ["Built APIs", "Led team"]
|
|
283
|
+
}
|
|
284
|
+
]
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Title Parsing Options
|
|
288
|
+
|
|
289
|
+
The `title` field in tree nodes can be parsed in three ways:
|
|
290
|
+
|
|
291
|
+
| Configuration | Title Type | Access Pattern |
|
|
292
|
+
| :------------------- | :--------------------------------- | :----------------------------------- |
|
|
293
|
+
| No options | `string` | `node.title` |
|
|
294
|
+
| `keys` + `delimiter` | `{ [key]: string }` | `node.title.role` |
|
|
295
|
+
| `keyDelimiter` | `{ key: string; value: string[] }` | `node.title.key`, `node.title.value` |
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## 🔮 Type Inference with `GetParsedType`
|
|
300
|
+
|
|
301
|
+
The library provides `GetParsedType<T>` utility type that infers the exact return type from your schema:
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
import type { ParseSchema, GetParsedType } from "@yuji-min/google-docs-parser";
|
|
305
|
+
|
|
306
|
+
const schema = {
|
|
307
|
+
sections: [
|
|
308
|
+
{ title: { name: "Bio", namedStyleType: "HEADING_1" } },
|
|
309
|
+
{
|
|
310
|
+
title: { name: "Skills", namedStyleType: "HEADING_1" },
|
|
311
|
+
content: { kind: "list", keyDelimiter: ":", delimiter: "," },
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
title: { name: "Career", namedStyleType: "HEADING_1" },
|
|
315
|
+
content: {
|
|
316
|
+
kind: "tree",
|
|
317
|
+
node: {
|
|
318
|
+
title: {
|
|
319
|
+
namedStyleType: "HEADING_2",
|
|
320
|
+
keys: ["role", "company", "period"],
|
|
321
|
+
delimiter: "|",
|
|
322
|
+
},
|
|
323
|
+
content: { kind: "list" },
|
|
324
|
+
},
|
|
325
|
+
},
|
|
326
|
+
},
|
|
327
|
+
],
|
|
328
|
+
} as const satisfies ParseSchema;
|
|
329
|
+
|
|
330
|
+
// ✅ Fully inferred type - no manual interfaces needed!
|
|
331
|
+
type MyData = GetParsedType<typeof schema>;
|
|
332
|
+
|
|
333
|
+
// Equivalent to:
|
|
334
|
+
// {
|
|
335
|
+
// Bio: string;
|
|
336
|
+
// Skills: { key: string; value: string[] }[];
|
|
337
|
+
// Career: {
|
|
338
|
+
// title: { role: string; company: string; period: string };
|
|
339
|
+
// content: string[];
|
|
340
|
+
// }[];
|
|
341
|
+
// }
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
> **Note:** Always use `as const satisfies ParseSchema` for accurate type inference.
|
|
211
345
|
|
|
212
346
|
---
|
|
213
347
|
|
package/dist/index.d.cts
CHANGED
|
@@ -128,11 +128,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
|
|
|
128
128
|
/**
|
|
129
129
|
* Helper Type: Represents the fully resolved type of a single Node.
|
|
130
130
|
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
131
|
+
* Structure:
|
|
132
|
+
* - `title`: The parsed title (string, object with keys, or keyed list).
|
|
133
|
+
* - `content`: The parsed body content (children nodes or list items).
|
|
134
|
+
*
|
|
135
|
+
* This matches the runtime structure: `{ title: ..., content: [...] }`.
|
|
134
136
|
*/
|
|
135
|
-
type StructuredItem<N extends Node> =
|
|
137
|
+
type StructuredItem<N extends Node> = {
|
|
138
|
+
title: ItemField<N["title"]>;
|
|
136
139
|
content: NodeContentItems<N["content"]>;
|
|
137
140
|
};
|
|
138
141
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -128,11 +128,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
|
|
|
128
128
|
/**
|
|
129
129
|
* Helper Type: Represents the fully resolved type of a single Node.
|
|
130
130
|
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
131
|
+
* Structure:
|
|
132
|
+
* - `title`: The parsed title (string, object with keys, or keyed list).
|
|
133
|
+
* - `content`: The parsed body content (children nodes or list items).
|
|
134
|
+
*
|
|
135
|
+
* This matches the runtime structure: `{ title: ..., content: [...] }`.
|
|
134
136
|
*/
|
|
135
|
-
type StructuredItem<N extends Node> =
|
|
137
|
+
type StructuredItem<N extends Node> = {
|
|
138
|
+
title: ItemField<N["title"]>;
|
|
136
139
|
content: NodeContentItems<N["content"]>;
|
|
137
140
|
};
|
|
138
141
|
/**
|
package/package.json
CHANGED