@yuji-min/google-docs-parser 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -19
- package/dist/index.d.cts +20 -7
- package/dist/index.d.ts +20 -7
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
**Turn your Google Docs into a Headless CMS.**
|
|
13
13
|
|
|
14
|
-
`google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean data instantly.
|
|
14
|
+
`google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean, **fully-typed** data instantly.
|
|
15
15
|
|
|
16
16
|
---
|
|
17
17
|
|
|
@@ -23,11 +23,12 @@ This library solves that complexity by allowing you to define a **Schema** that
|
|
|
23
23
|
|
|
24
24
|
### ✨ Key Features
|
|
25
25
|
|
|
26
|
-
- **Type-Safe:**
|
|
26
|
+
- **100% Type-Safe:** Use `GetParsedType<typeof schema>` to infer the exact return type from your schema—no manual type definitions needed.
|
|
27
27
|
- **Hierarchical Parsing:** Supports nested tree structures (e.g., _Heading 2_ containing _Heading 3_ children).
|
|
28
|
+
- **Consistent Structure:** Tree nodes always follow `{ title, content }` pattern for predictable data access.
|
|
28
29
|
- **Smart Text Parsing:** Built-in parsers for:
|
|
29
|
-
- Key-Value pairs (e.g., `
|
|
30
|
-
- Delimited fields (e.g., `
|
|
30
|
+
- Key-Value pairs (e.g., `Skills: React, TypeScript`)
|
|
31
|
+
- Delimited fields (e.g., `Engineer | Google | 2024`)
|
|
31
32
|
- Flattened lists or grouped arrays.
|
|
32
33
|
- **Auth Ready:** Seamless integration with `google-auth-library` and `googleapis`.
|
|
33
34
|
|
|
@@ -107,7 +108,11 @@ Imagine a Google Doc structured like a resume or project list:
|
|
|
107
108
|
Create a schema object that mirrors the visual hierarchy of your document.
|
|
108
109
|
|
|
109
110
|
```typescript
|
|
110
|
-
import {
|
|
111
|
+
import {
|
|
112
|
+
getParsedDocument,
|
|
113
|
+
ParseSchema,
|
|
114
|
+
GetParsedType,
|
|
115
|
+
} from "@yuji-min/google-docs-parser";
|
|
111
116
|
|
|
112
117
|
// 1. Define the schema
|
|
113
118
|
const resumeSchema = {
|
|
@@ -124,7 +129,7 @@ const resumeSchema = {
|
|
|
124
129
|
kind: "tree", // This section is a hierarchical tree
|
|
125
130
|
node: {
|
|
126
131
|
// The tree nodes start with "Heading 2"
|
|
127
|
-
//
|
|
132
|
+
// Parse the heading text with delimiter & keys!
|
|
128
133
|
title: {
|
|
129
134
|
namedStyleType: "HEADING_2",
|
|
130
135
|
keys: ["company", "role"],
|
|
@@ -136,16 +141,26 @@ const resumeSchema = {
|
|
|
136
141
|
},
|
|
137
142
|
},
|
|
138
143
|
],
|
|
139
|
-
} as const; // 'as const' is CRITICAL for type inference
|
|
144
|
+
} as const satisfies ParseSchema; // 'as const' is CRITICAL for type inference
|
|
140
145
|
|
|
141
|
-
// 2.
|
|
146
|
+
// 2. Infer the return type from schema (optional but recommended)
|
|
147
|
+
type ResumeData = GetParsedType<typeof resumeSchema>;
|
|
148
|
+
|
|
149
|
+
// 3. Fetch and Parse
|
|
142
150
|
async function main() {
|
|
143
151
|
const docId = "YOUR_GOOGLE_DOC_ID";
|
|
144
152
|
|
|
145
153
|
try {
|
|
146
|
-
// 'data' is fully typed
|
|
154
|
+
// 'data' is fully typed as ResumeData!
|
|
147
155
|
const data = await getParsedDocument(docId, resumeSchema);
|
|
148
156
|
console.log(JSON.stringify(data, null, 2));
|
|
157
|
+
|
|
158
|
+
// ✅ Full type inference - no manual types needed
|
|
159
|
+
console.log(data.Profile); // string: "Senior Software Engineer..."
|
|
160
|
+
const firstJob = data.Experience[0];
|
|
161
|
+
console.log(firstJob.title.company); // "Tech Corp"
|
|
162
|
+
console.log(firstJob.title.role); // "Backend Lead"
|
|
163
|
+
console.log(firstJob.content); // ["Designed microservices...", "Managed..."]
|
|
149
164
|
} catch (error) {
|
|
150
165
|
console.error(error);
|
|
151
166
|
}
|
|
@@ -156,18 +171,18 @@ main();
|
|
|
156
171
|
|
|
157
172
|
### 3. The Result
|
|
158
173
|
|
|
174
|
+
Tree nodes always have a consistent `{ title, content }` structure:
|
|
175
|
+
|
|
159
176
|
```json
|
|
160
177
|
{
|
|
161
178
|
"Profile": "Senior Software Engineer based in Seoul.",
|
|
162
179
|
"Experience": [
|
|
163
180
|
{
|
|
164
|
-
"company": "Tech Corp",
|
|
165
|
-
"role": "Backend Lead",
|
|
181
|
+
"title": { "company": "Tech Corp", "role": "Backend Lead" },
|
|
166
182
|
"content": ["Designed microservices architecture", "Managed a team of 5"]
|
|
167
183
|
},
|
|
168
184
|
{
|
|
169
|
-
"company": "Startup Inc",
|
|
170
|
-
"role": "Full Stack",
|
|
185
|
+
"title": { "company": "Startup Inc", "role": "Full Stack" },
|
|
171
186
|
"content": ["Built MVP in 3 months"]
|
|
172
187
|
}
|
|
173
188
|
]
|
|
@@ -178,7 +193,7 @@ main();
|
|
|
178
193
|
|
|
179
194
|
## 📚 Parsing Schema Guide
|
|
180
195
|
|
|
181
|
-
The `ParseSchema` object controls how the parser reads your document.
|
|
196
|
+
The `ParseSchema` object controls how the parser reads your document. Use `GetParsedType<typeof schema>` to infer the exact TypeScript type from your schema.
|
|
182
197
|
|
|
183
198
|
### Section Configuration
|
|
184
199
|
|
|
@@ -194,20 +209,140 @@ The `ParseSchema` object controls how the parser reads your document.
|
|
|
194
209
|
|
|
195
210
|
If `content` is undefined, the parser collects all paragraphs following the header until the next section starts, joining them into a single string.
|
|
196
211
|
|
|
212
|
+
```typescript
|
|
213
|
+
// Schema
|
|
214
|
+
{ title: { name: "About", namedStyleType: "HEADING_1" } }
|
|
215
|
+
|
|
216
|
+
// Inferred Type → string
|
|
217
|
+
// Result → "Hello, I am a developer."
|
|
218
|
+
```
|
|
219
|
+
|
|
197
220
|
#### 2. List (`kind: "list"`)
|
|
198
221
|
|
|
199
222
|
Parses paragraphs as an array. Useful for bullet points or simple lists.
|
|
200
223
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
224
|
+
| Option | Type | Description |
|
|
225
|
+
| :------------- | :--------- | :------------------------------------------------------- |
|
|
226
|
+
| `isFlatten` | `boolean` | If true, merges multiple lines into a single flat array. |
|
|
227
|
+
| `keyDelimiter` | `string` | Parses `Key: Value` lines into `{ key, value }` objects. |
|
|
228
|
+
| `keys` | `string[]` | Maps delimited values to named fields. |
|
|
229
|
+
| `delimiter` | `string` | Splits a line by a character (default: `,`). |
|
|
230
|
+
|
|
231
|
+
```typescript
|
|
232
|
+
// Schema: Simple list
|
|
233
|
+
{ content: { kind: "list" } }
|
|
234
|
+
// Inferred Type → string[]
|
|
235
|
+
// Result → ["Item 1", "Item 2", "Item 3"]
|
|
236
|
+
|
|
237
|
+
// Schema: Keyed list (Key: Value format)
|
|
238
|
+
{ content: { kind: "list", keyDelimiter: ":", delimiter: "," } }
|
|
239
|
+
// Inferred Type → { key: string; value: string[] }[]
|
|
240
|
+
// Result → [{ key: "Skills", value: ["React", "TypeScript"] }]
|
|
241
|
+
|
|
242
|
+
// Schema: Mapped fields
|
|
243
|
+
{ content: { kind: "list", keys: ["school", "degree"], delimiter: "|" } }
|
|
244
|
+
// Inferred Type → { school: string; degree: string }[]
|
|
245
|
+
// Result → [{ school: "MIT", degree: "B.S. Computer Science" }]
|
|
246
|
+
```
|
|
204
247
|
|
|
205
248
|
#### 3. Tree (`kind: "tree"`)
|
|
206
249
|
|
|
207
|
-
Parses hierarchical structures. Ideal for nested sections like "H2
|
|
250
|
+
Parses hierarchical structures. Ideal for nested sections like "H2 → H3 → Content".
|
|
208
251
|
|
|
209
252
|
- **`node`**: Defines the schema for the child nodes.
|
|
210
|
-
- **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level
|
|
253
|
+
- **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level.
|
|
254
|
+
|
|
255
|
+
**Tree nodes always have a consistent `{ title, content }` structure:**
|
|
256
|
+
|
|
257
|
+
```typescript
|
|
258
|
+
// Schema
|
|
259
|
+
{
|
|
260
|
+
content: {
|
|
261
|
+
kind: "tree",
|
|
262
|
+
node: {
|
|
263
|
+
title: {
|
|
264
|
+
namedStyleType: "HEADING_2",
|
|
265
|
+
keys: ["role", "company"],
|
|
266
|
+
delimiter: "|"
|
|
267
|
+
},
|
|
268
|
+
content: { kind: "list" }
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Inferred Type
|
|
274
|
+
// {
|
|
275
|
+
// title: { role: string; company: string };
|
|
276
|
+
// content: string[];
|
|
277
|
+
// }[]
|
|
278
|
+
|
|
279
|
+
// Result
|
|
280
|
+
[
|
|
281
|
+
{
|
|
282
|
+
"title": { "role": "Engineer", "company": "Google" },
|
|
283
|
+
"content": ["Built APIs", "Led team"]
|
|
284
|
+
}
|
|
285
|
+
]
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
### Title Parsing Options
|
|
289
|
+
|
|
290
|
+
The `title` field in tree nodes can be parsed in three ways:
|
|
291
|
+
|
|
292
|
+
| Configuration | Title Type | Access Pattern |
|
|
293
|
+
| :------------------- | :--------------------------------- | :----------------------------------- |
|
|
294
|
+
| No options | `string` | `node.title` |
|
|
295
|
+
| `keys` + `delimiter` | `{ [key]: string }` | `node.title.role` |
|
|
296
|
+
| `keyDelimiter` | `{ key: string; value: string[] }` | `node.title.key`, `node.title.value` |
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## 🔮 Type Inference with `GetParsedType`
|
|
301
|
+
|
|
302
|
+
The library provides `GetParsedType<T>` utility type that infers the exact return type from your schema:
|
|
303
|
+
|
|
304
|
+
```typescript
|
|
305
|
+
import type { ParseSchema, GetParsedType } from "@yuji-min/google-docs-parser";
|
|
306
|
+
|
|
307
|
+
const schema = {
|
|
308
|
+
sections: [
|
|
309
|
+
{ title: { name: "Bio", namedStyleType: "HEADING_1" } },
|
|
310
|
+
{
|
|
311
|
+
title: { name: "Skills", namedStyleType: "HEADING_1" },
|
|
312
|
+
content: { kind: "list", keyDelimiter: ":", delimiter: "," },
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
title: { name: "Career", namedStyleType: "HEADING_1" },
|
|
316
|
+
content: {
|
|
317
|
+
kind: "tree",
|
|
318
|
+
node: {
|
|
319
|
+
title: {
|
|
320
|
+
namedStyleType: "HEADING_2",
|
|
321
|
+
keys: ["role", "company", "period"],
|
|
322
|
+
delimiter: "|",
|
|
323
|
+
},
|
|
324
|
+
content: { kind: "list" },
|
|
325
|
+
},
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
],
|
|
329
|
+
} as const satisfies ParseSchema;
|
|
330
|
+
|
|
331
|
+
// ✅ Fully inferred type - no manual interfaces needed!
|
|
332
|
+
type MyData = GetParsedType<typeof schema>;
|
|
333
|
+
|
|
334
|
+
// Equivalent to:
|
|
335
|
+
// {
|
|
336
|
+
// Bio: string;
|
|
337
|
+
// Skills: { key: string; value: string[] }[];
|
|
338
|
+
// Career: {
|
|
339
|
+
// title: { role: string; company: string; period: string };
|
|
340
|
+
// content: string[];
|
|
341
|
+
// }[];
|
|
342
|
+
// }
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
> **Note:** Always use `as const satisfies ParseSchema` for accurate type inference.
|
|
211
346
|
|
|
212
347
|
---
|
|
213
348
|
|
package/dist/index.d.cts
CHANGED
|
@@ -104,15 +104,25 @@ type ItemField<T extends Schema> = T extends {
|
|
|
104
104
|
} ? {
|
|
105
105
|
[P in K & string]: string;
|
|
106
106
|
} : string;
|
|
107
|
+
/**
|
|
108
|
+
* Helper Type: Represents a keyed list item (parsed from "Key: Value" format).
|
|
109
|
+
*/
|
|
110
|
+
type KeyedListItem = {
|
|
111
|
+
key: string;
|
|
112
|
+
value: string[];
|
|
113
|
+
};
|
|
107
114
|
/**
|
|
108
115
|
* Helper Type: Infers the result type for a `List` content.
|
|
109
116
|
*
|
|
110
|
-
* - If
|
|
117
|
+
* - If `keys` is defined, returns an array of mapped objects.
|
|
118
|
+
* - If `keyDelimiter` is defined, returns an array of `{ key, value }` objects.
|
|
111
119
|
* - Otherwise, returns an array of strings.
|
|
112
120
|
*/
|
|
113
121
|
type ContentListType<C extends List> = C extends {
|
|
114
122
|
keys: readonly any[];
|
|
115
|
-
} ? Array<ItemField<C>> :
|
|
123
|
+
} ? Array<ItemField<C>> : C extends {
|
|
124
|
+
keyDelimiter: string;
|
|
125
|
+
} ? Array<KeyedListItem> : string[];
|
|
116
126
|
/**
|
|
117
127
|
* Helper Type: Recursively infers the type of a Node's content.
|
|
118
128
|
*
|
|
@@ -128,11 +138,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
|
|
|
128
138
|
/**
|
|
129
139
|
* Helper Type: Represents the fully resolved type of a single Node.
|
|
130
140
|
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
141
|
+
* Structure:
|
|
142
|
+
* - `title`: The parsed title (string, object with keys, or keyed list).
|
|
143
|
+
* - `content`: The parsed body content (children nodes or list items).
|
|
144
|
+
*
|
|
145
|
+
* This matches the runtime structure: `{ title: ..., content: [...] }`.
|
|
134
146
|
*/
|
|
135
|
-
type StructuredItem<N extends Node> =
|
|
147
|
+
type StructuredItem<N extends Node> = {
|
|
148
|
+
title: ItemField<N["title"]>;
|
|
136
149
|
content: NodeContentItems<N["content"]>;
|
|
137
150
|
};
|
|
138
151
|
/**
|
|
@@ -152,7 +165,7 @@ type GetParsedType<T extends ParseSchema> = {
|
|
|
152
165
|
node: infer N extends Node;
|
|
153
166
|
} ? Array<StructuredItem<N>> : S["content"] extends {
|
|
154
167
|
kind: "list";
|
|
155
|
-
} ? ContentListType<S["content"]> :
|
|
168
|
+
} ? ContentListType<S["content"]> : string;
|
|
156
169
|
};
|
|
157
170
|
|
|
158
171
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -104,15 +104,25 @@ type ItemField<T extends Schema> = T extends {
|
|
|
104
104
|
} ? {
|
|
105
105
|
[P in K & string]: string;
|
|
106
106
|
} : string;
|
|
107
|
+
/**
|
|
108
|
+
* Helper Type: Represents a keyed list item (parsed from "Key: Value" format).
|
|
109
|
+
*/
|
|
110
|
+
type KeyedListItem = {
|
|
111
|
+
key: string;
|
|
112
|
+
value: string[];
|
|
113
|
+
};
|
|
107
114
|
/**
|
|
108
115
|
* Helper Type: Infers the result type for a `List` content.
|
|
109
116
|
*
|
|
110
|
-
* - If
|
|
117
|
+
* - If `keys` is defined, returns an array of mapped objects.
|
|
118
|
+
* - If `keyDelimiter` is defined, returns an array of `{ key, value }` objects.
|
|
111
119
|
* - Otherwise, returns an array of strings.
|
|
112
120
|
*/
|
|
113
121
|
type ContentListType<C extends List> = C extends {
|
|
114
122
|
keys: readonly any[];
|
|
115
|
-
} ? Array<ItemField<C>> :
|
|
123
|
+
} ? Array<ItemField<C>> : C extends {
|
|
124
|
+
keyDelimiter: string;
|
|
125
|
+
} ? Array<KeyedListItem> : string[];
|
|
116
126
|
/**
|
|
117
127
|
* Helper Type: Recursively infers the type of a Node's content.
|
|
118
128
|
*
|
|
@@ -128,11 +138,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
|
|
|
128
138
|
/**
|
|
129
139
|
* Helper Type: Represents the fully resolved type of a single Node.
|
|
130
140
|
*
|
|
131
|
-
*
|
|
132
|
-
*
|
|
133
|
-
*
|
|
141
|
+
* Structure:
|
|
142
|
+
* - `title`: The parsed title (string, object with keys, or keyed list).
|
|
143
|
+
* - `content`: The parsed body content (children nodes or list items).
|
|
144
|
+
*
|
|
145
|
+
* This matches the runtime structure: `{ title: ..., content: [...] }`.
|
|
134
146
|
*/
|
|
135
|
-
type StructuredItem<N extends Node> =
|
|
147
|
+
type StructuredItem<N extends Node> = {
|
|
148
|
+
title: ItemField<N["title"]>;
|
|
136
149
|
content: NodeContentItems<N["content"]>;
|
|
137
150
|
};
|
|
138
151
|
/**
|
|
@@ -152,7 +165,7 @@ type GetParsedType<T extends ParseSchema> = {
|
|
|
152
165
|
node: infer N extends Node;
|
|
153
166
|
} ? Array<StructuredItem<N>> : S["content"] extends {
|
|
154
167
|
kind: "list";
|
|
155
|
-
} ? ContentListType<S["content"]> :
|
|
168
|
+
} ? ContentListType<S["content"]> : string;
|
|
156
169
|
};
|
|
157
170
|
|
|
158
171
|
/**
|
package/package.json
CHANGED