@yuji-min/google-docs-parser 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@
11
11
 
12
12
  **Turn your Google Docs into a Headless CMS.**
13
13
 
14
- `google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean data instantly.
14
+ `google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean, **fully-typed** data instantly.
15
15
 
16
16
  ---
17
17
 
@@ -23,11 +23,12 @@ This library solves that complexity by allowing you to define a **Schema** that
23
23
 
24
24
  ### ✨ Key Features
25
25
 
26
- - **Type-Safe:** The return type is automatically inferred from your schema configuration using TypeScript generics.
26
+ - **100% Type-Safe:** Use `GetParsedType<typeof schema>` to infer the exact return type from your schema—no manual type definitions needed.
27
27
  - **Hierarchical Parsing:** Supports nested tree structures (e.g., _Heading 2_ containing _Heading 3_ children).
28
+ - **Consistent Structure:** Tree nodes always follow `{ title, content }` pattern for predictable data access.
28
29
  - **Smart Text Parsing:** Built-in parsers for:
29
- - Key-Value pairs (e.g., `Role: Engineer`)
30
- - Delimited fields (e.g., `2024 | Senior Dev | Google`)
30
+ - Key-Value pairs (e.g., `Skills: React, TypeScript`)
31
+ - Delimited fields (e.g., `Engineer | Google | 2024`)
31
32
  - Flattened lists or grouped arrays.
32
33
  - **Auth Ready:** Seamless integration with `google-auth-library` and `googleapis`.
33
34
 
@@ -107,7 +108,11 @@ Imagine a Google Doc structured like a resume or project list:
107
108
  Create a schema object that mirrors the visual hierarchy of your document.
108
109
 
109
110
  ```typescript
110
- import { getParsedDocument, ParseSchema } from "@yuji-min/google-docs-parser";
111
+ import {
112
+ getParsedDocument,
113
+ ParseSchema,
114
+ GetParsedType,
115
+ } from "@yuji-min/google-docs-parser";
111
116
 
112
117
  // 1. Define the schema
113
118
  const resumeSchema = {
@@ -124,7 +129,7 @@ const resumeSchema = {
124
129
  kind: "tree", // This section is a hierarchical tree
125
130
  node: {
126
131
  // The tree nodes start with "Heading 2"
127
- // We can also parse the heading text itself!
132
+ // Parse the heading text with delimiter & keys!
128
133
  title: {
129
134
  namedStyleType: "HEADING_2",
130
135
  keys: ["company", "role"],
@@ -136,16 +141,26 @@ const resumeSchema = {
136
141
  },
137
142
  },
138
143
  ],
139
- } as const; // 'as const' is CRITICAL for type inference
144
+ } as const satisfies ParseSchema; // 'as const' is CRITICAL for type inference
140
145
 
141
- // 2. Fetch and Parse
146
+ // 2. Infer the return type from schema (optional but recommended)
147
+ type ResumeData = GetParsedType<typeof resumeSchema>;
148
+
149
+ // 3. Fetch and Parse
142
150
  async function main() {
143
151
  const docId = "YOUR_GOOGLE_DOC_ID";
144
152
 
145
153
  try {
146
- // 'data' is fully typed based on resumeSchema!
154
+ // 'data' is fully typed as ResumeData!
147
155
  const data = await getParsedDocument(docId, resumeSchema);
148
156
  console.log(JSON.stringify(data, null, 2));
157
+
158
+ // ✅ Full type inference - no manual types needed
159
+ console.log(data.Profile); // string: "Senior Software Engineer..."
160
+ const firstJob = data.Experience[0];
161
+ console.log(firstJob.title.company); // "Tech Corp"
162
+ console.log(firstJob.title.role); // "Backend Lead"
163
+ console.log(firstJob.content); // ["Designed microservices...", "Managed..."]
149
164
  } catch (error) {
150
165
  console.error(error);
151
166
  }
@@ -156,18 +171,18 @@ main();
156
171
 
157
172
  ### 3. The Result
158
173
 
174
+ Tree nodes always have a consistent `{ title, content }` structure:
175
+
159
176
  ```json
160
177
  {
161
178
  "Profile": "Senior Software Engineer based in Seoul.",
162
179
  "Experience": [
163
180
  {
164
- "company": "Tech Corp",
165
- "role": "Backend Lead",
181
+ "title": { "company": "Tech Corp", "role": "Backend Lead" },
166
182
  "content": ["Designed microservices architecture", "Managed a team of 5"]
167
183
  },
168
184
  {
169
- "company": "Startup Inc",
170
- "role": "Full Stack",
185
+ "title": { "company": "Startup Inc", "role": "Full Stack" },
171
186
  "content": ["Built MVP in 3 months"]
172
187
  }
173
188
  ]
@@ -178,7 +193,7 @@ main();
178
193
 
179
194
  ## 📚 Parsing Schema Guide
180
195
 
181
- The `ParseSchema` object controls how the parser reads your document.
196
+ The `ParseSchema` object controls how the parser reads your document. Use `GetParsedType<typeof schema>` to infer the exact TypeScript type from your schema.
182
197
 
183
198
  ### Section Configuration
184
199
 
@@ -194,20 +209,140 @@ The `ParseSchema` object controls how the parser reads your document.
194
209
 
195
210
  If `content` is undefined, the parser collects all paragraphs following the header until the next section starts, joining them into a single string.
196
211
 
212
+ ```typescript
213
+ // Schema
214
+ { title: { name: "About", namedStyleType: "HEADING_1" } }
215
+
216
+ // Inferred Type → string
217
+ // Result → "Hello, I am a developer."
218
+ ```
219
+
197
220
  #### 2. List (`kind: "list"`)
198
221
 
199
222
  Parses paragraphs as an array. Useful for bullet points or simple lists.
200
223
 
201
- - **`isFlatten`**: (boolean) If true, merges multiple lines into a single flat array.
202
- - **`keyDelimiter`**: (string) Parses "Key: Value" lines into `{ key: "...", value: [...] }` objects.
203
- - **`delimiter`**: (string) Splits a line by a character (e.g., comma) into an array.
224
+ | Option | Type | Description |
225
+ | :------------- | :--------- | :------------------------------------------------------- |
226
+ | `isFlatten` | `boolean` | If true, merges multiple lines into a single flat array. |
227
+ | `keyDelimiter` | `string` | Parses `Key: Value` lines into `{ key, value }` objects. |
228
+ | `keys` | `string[]` | Maps delimited values to named fields. |
229
+ | `delimiter` | `string` | Splits a line by a character (default: `,`). |
230
+
231
+ ```typescript
232
+ // Schema: Simple list
233
+ { content: { kind: "list" } }
234
+ // Inferred Type → string[]
235
+ // Result → ["Item 1", "Item 2", "Item 3"]
236
+
237
+ // Schema: Keyed list (Key: Value format)
238
+ { content: { kind: "list", keyDelimiter: ":", delimiter: "," } }
239
+ // Inferred Type → { key: string; value: string[] }[]
240
+ // Result → [{ key: "Skills", value: ["React", "TypeScript"] }]
241
+
242
+ // Schema: Mapped fields
243
+ { content: { kind: "list", keys: ["school", "degree"], delimiter: "|" } }
244
+ // Inferred Type → { school: string; degree: string }[]
245
+ // Result → [{ school: "MIT", degree: "B.S. Computer Science" }]
246
+ ```
204
247
 
205
248
  #### 3. Tree (`kind: "tree"`)
206
249
 
207
- Parses hierarchical structures. Ideal for nested sections like "H2 -> H3 -> Content".
250
+ Parses hierarchical structures. Ideal for nested sections like "H2 H3 Content".
208
251
 
209
252
  - **`node`**: Defines the schema for the child nodes.
210
- - **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level (e.g., an H2 stops an open H2 block).
253
+ - **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level.
254
+
255
+ **Tree nodes always have a consistent `{ title, content }` structure:**
256
+
257
+ ```typescript
258
+ // Schema
259
+ {
260
+ content: {
261
+ kind: "tree",
262
+ node: {
263
+ title: {
264
+ namedStyleType: "HEADING_2",
265
+ keys: ["role", "company"],
266
+ delimiter: "|"
267
+ },
268
+ content: { kind: "list" }
269
+ }
270
+ }
271
+ }
272
+
273
+ // Inferred Type
274
+ // {
275
+ // title: { role: string; company: string };
276
+ // content: string[];
277
+ // }[]
278
+
279
+ // Result
280
+ [
281
+ {
282
+ "title": { "role": "Engineer", "company": "Google" },
283
+ "content": ["Built APIs", "Led team"]
284
+ }
285
+ ]
286
+ ```
287
+
288
+ ### Title Parsing Options
289
+
290
+ The `title` field in tree nodes can be parsed in three ways:
291
+
292
+ | Configuration | Title Type | Access Pattern |
293
+ | :------------------- | :--------------------------------- | :----------------------------------- |
294
+ | No options | `string` | `node.title` |
295
+ | `keys` + `delimiter` | `{ [key]: string }` | `node.title.role` |
296
+ | `keyDelimiter` | `{ key: string; value: string[] }` | `node.title.key`, `node.title.value` |
297
+
298
+ ---
299
+
300
+ ## 🔮 Type Inference with `GetParsedType`
301
+
302
+ The library provides `GetParsedType<T>` utility type that infers the exact return type from your schema:
303
+
304
+ ```typescript
305
+ import type { ParseSchema, GetParsedType } from "@yuji-min/google-docs-parser";
306
+
307
+ const schema = {
308
+ sections: [
309
+ { title: { name: "Bio", namedStyleType: "HEADING_1" } },
310
+ {
311
+ title: { name: "Skills", namedStyleType: "HEADING_1" },
312
+ content: { kind: "list", keyDelimiter: ":", delimiter: "," },
313
+ },
314
+ {
315
+ title: { name: "Career", namedStyleType: "HEADING_1" },
316
+ content: {
317
+ kind: "tree",
318
+ node: {
319
+ title: {
320
+ namedStyleType: "HEADING_2",
321
+ keys: ["role", "company", "period"],
322
+ delimiter: "|",
323
+ },
324
+ content: { kind: "list" },
325
+ },
326
+ },
327
+ },
328
+ ],
329
+ } as const satisfies ParseSchema;
330
+
331
+ // ✅ Fully inferred type - no manual interfaces needed!
332
+ type MyData = GetParsedType<typeof schema>;
333
+
334
+ // Equivalent to:
335
+ // {
336
+ // Bio: string;
337
+ // Skills: { key: string; value: string[] }[];
338
+ // Career: {
339
+ // title: { role: string; company: string; period: string };
340
+ // content: string[];
341
+ // }[];
342
+ // }
343
+ ```
344
+
345
+ > **Note:** Always use `as const satisfies ParseSchema` for accurate type inference.
211
346
 
212
347
  ---
213
348
 
package/dist/index.d.cts CHANGED
@@ -104,15 +104,25 @@ type ItemField<T extends Schema> = T extends {
104
104
  } ? {
105
105
  [P in K & string]: string;
106
106
  } : string;
107
+ /**
108
+ * Helper Type: Represents a keyed list item (parsed from "Key: Value" format).
109
+ */
110
+ type KeyedListItem = {
111
+ key: string;
112
+ value: string[];
113
+ };
107
114
  /**
108
115
  * Helper Type: Infers the result type for a `List` content.
109
116
  *
110
- * - If the schema has keys, returns an array of objects.
117
+ * - If `keys` is defined, returns an array of mapped objects.
118
+ * - If `keyDelimiter` is defined, returns an array of `{ key, value }` objects.
111
119
  * - Otherwise, returns an array of strings.
112
120
  */
113
121
  type ContentListType<C extends List> = C extends {
114
122
  keys: readonly any[];
115
- } ? Array<ItemField<C>> : string[];
123
+ } ? Array<ItemField<C>> : C extends {
124
+ keyDelimiter: string;
125
+ } ? Array<KeyedListItem> : string[];
116
126
  /**
117
127
  * Helper Type: Recursively infers the type of a Node's content.
118
128
  *
@@ -128,11 +138,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
128
138
  /**
129
139
  * Helper Type: Represents the fully resolved type of a single Node.
130
140
  *
131
- * Merges:
132
- * 1. The parsed fields from the Node's **Title** (if it has `keys`).
133
- * 2. The parsed `content` property (children or list).
141
+ * Structure:
142
+ * - `title`: The parsed title (string, object with keys, or keyed list).
143
+ * - `content`: The parsed body content (children nodes or list items).
144
+ *
145
+ * This matches the runtime structure: `{ title: ..., content: [...] }`.
134
146
  */
135
- type StructuredItem<N extends Node> = ItemField<N["title"]> & {
147
+ type StructuredItem<N extends Node> = {
148
+ title: ItemField<N["title"]>;
136
149
  content: NodeContentItems<N["content"]>;
137
150
  };
138
151
  /**
@@ -152,7 +165,7 @@ type GetParsedType<T extends ParseSchema> = {
152
165
  node: infer N extends Node;
153
166
  } ? Array<StructuredItem<N>> : S["content"] extends {
154
167
  kind: "list";
155
- } ? ContentListType<S["content"]> : unknown;
168
+ } ? ContentListType<S["content"]> : string;
156
169
  };
157
170
 
158
171
  /**
package/dist/index.d.ts CHANGED
@@ -104,15 +104,25 @@ type ItemField<T extends Schema> = T extends {
104
104
  } ? {
105
105
  [P in K & string]: string;
106
106
  } : string;
107
+ /**
108
+ * Helper Type: Represents a keyed list item (parsed from "Key: Value" format).
109
+ */
110
+ type KeyedListItem = {
111
+ key: string;
112
+ value: string[];
113
+ };
107
114
  /**
108
115
  * Helper Type: Infers the result type for a `List` content.
109
116
  *
110
- * - If the schema has keys, returns an array of objects.
117
+ * - If `keys` is defined, returns an array of mapped objects.
118
+ * - If `keyDelimiter` is defined, returns an array of `{ key, value }` objects.
111
119
  * - Otherwise, returns an array of strings.
112
120
  */
113
121
  type ContentListType<C extends List> = C extends {
114
122
  keys: readonly any[];
115
- } ? Array<ItemField<C>> : string[];
123
+ } ? Array<ItemField<C>> : C extends {
124
+ keyDelimiter: string;
125
+ } ? Array<KeyedListItem> : string[];
116
126
  /**
117
127
  * Helper Type: Recursively infers the type of a Node's content.
118
128
  *
@@ -128,11 +138,14 @@ type NodeContentItems<C extends Content | undefined> = C extends {
128
138
  /**
129
139
  * Helper Type: Represents the fully resolved type of a single Node.
130
140
  *
131
- * Merges:
132
- * 1. The parsed fields from the Node's **Title** (if it has `keys`).
133
- * 2. The parsed `content` property (children or list).
141
+ * Structure:
142
+ * - `title`: The parsed title (string, object with keys, or keyed list).
143
+ * - `content`: The parsed body content (children nodes or list items).
144
+ *
145
+ * This matches the runtime structure: `{ title: ..., content: [...] }`.
134
146
  */
135
- type StructuredItem<N extends Node> = ItemField<N["title"]> & {
147
+ type StructuredItem<N extends Node> = {
148
+ title: ItemField<N["title"]>;
136
149
  content: NodeContentItems<N["content"]>;
137
150
  };
138
151
  /**
@@ -152,7 +165,7 @@ type GetParsedType<T extends ParseSchema> = {
152
165
  node: infer N extends Node;
153
166
  } ? Array<StructuredItem<N>> : S["content"] extends {
154
167
  kind: "list";
155
- } ? ContentListType<S["content"]> : unknown;
168
+ } ? ContentListType<S["content"]> : string;
156
169
  };
157
170
 
158
171
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yuji-min/google-docs-parser",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "Turn your Google Docs into a Headless CMS. A strictly typed, schema-based parser for Google Docs.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",