@yuji-min/google-docs-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,242 @@
1
+ # 📄 Google Docs Parser
2
+
3
+ <h1 align="center">
4
+ <img width="200px" src="media/logo.png" alt="octoreport">
5
+ </h1>
6
+
7
+ ![TypeScript](https://img.shields.io/badge/TypeScript-5.0+-blue?logo=typescript)
8
+ ![License](https://img.shields.io/badge/License-MIT-green)
9
+ ![Status](https://img.shields.io/badge/Status-Beta-orange)
10
+ ![npm](https://img.shields.io/npm/v/@yuji-min/google-docs-parser)
11
+
12
+ **Turn your Google Docs into a Headless CMS.**
13
+
14
+ `google-docs-parser` is a TypeScript library that transforms raw Google Docs content into structured JSON data based on a user-defined schema. Stop wrestling with the raw Google Docs API structure—define your schema and get clean data instantly.
15
+
16
+ ---
17
+
18
+ ## 🚀 Why use this?
19
+
20
+ Parsing the raw `docs_v1.Schema$Document` JSON from the Google API is complex. It involves handling deep nesting of `structuralElements`, `paragraph`, `elements`, and `textRun`, along with varying styling attributes.
21
+
22
+ This library solves that complexity by allowing you to define a **Schema** that maps your document's visual structure (Headings, Lists, Key-Values) directly to data structures.
23
+
24
+ ### ✨ Key Features
25
+
26
+ - **Type-Safe:** The return type is automatically inferred from your schema configuration using TypeScript generics.
27
+ - **Hierarchical Parsing:** Supports nested tree structures (e.g., _Heading 2_ containing _Heading 3_ children).
28
+ - **Smart Text Parsing:** Built-in parsers for:
29
+ - Key-Value pairs (e.g., `Role: Engineer`)
30
+ - Delimited fields (e.g., `2024 | Senior Dev | Google`)
31
+ - Flattened lists or grouped arrays.
32
+ - **Auth Ready:** Seamless integration with `google-auth-library` and `googleapis`.
33
+
34
+ ---
35
+
36
+ ## 📦 Installation
37
+
38
+ ```bash
39
+ npm install @yuji-min/google-docs-parser googleapis google-auth-library
40
+ # or
41
+ yarn add @yuji-min/google-docs-parser googleapis google-auth-library
42
+ ```
43
+
44
+ ---
45
+
46
+ ## 🔑 Authentication & Setup
47
+
48
+ To use this library, you need a Google Cloud Service Account with access to the Google Docs API.
49
+
50
+ ### 1. Create Google Cloud Credentials
51
+
52
+ 1. Go to the [Google Cloud Console](https://console.cloud.google.com/).
53
+ 2. Create a new project (or select an existing one).
54
+ 3. Enable the **Google Docs API** in the "APIs & Services" > "Library" section.
55
+ 4. Go to "IAM & Admin" > "Service Accounts" and create a new service account.
56
+ 5. Create and download a **JSON key** for this service account.
57
+
58
+ ### 2. Configure Environment Variable
59
+
60
+ Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of your downloaded JSON key file.
61
+
62
+ **Mac/Linux:**
63
+
64
+ ```bash
65
+ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/your/service-account-key.json"
66
+ ```
67
+
68
+ **Windows (PowerShell):**
69
+
70
+ ```powershell
71
+ $env:GOOGLE_APPLICATION_CREDENTIALS="C:\path\to\your\service-account-key.json"
72
+ ```
73
+
74
+ > **Note:** The library uses `google-auth-library` internally, which automatically looks for credentials at the path defined in this environment variable.
75
+
76
+ ### 3. Share the Document (Important!)
77
+
78
+ Google Docs are private by default. You must share the target document with your Service Account's email address (found in your JSON key, e.g., my-bot@my-project.iam.gserviceaccount.com) with Viewer permission.
79
+
80
+ **If you don't do this, the parser will throw a permission error.**
81
+
82
+ ---
83
+
84
+ ## 🛠️ Getting Started
85
+
86
+ ### 1. Prepare your Google Doc
87
+
88
+ Imagine a Google Doc structured like a resume or project list:
89
+
90
+ > **Profile** (Heading 1)
91
+ >
92
+ > Senior Software Engineer based in Seoul.
93
+ >
94
+ > **Experience** (Heading 1)
95
+ >
96
+ > **Tech Corp | Backend Lead** (Heading 2)
97
+ >
98
+ > - Designed microservices architecture
99
+ > - Managed a team of 5
100
+ >
101
+ > **Startup Inc | Full Stack** (Heading 2)
102
+ >
103
+ > - Built MVP in 3 months
104
+
105
+ ### 2. Define Schema & Parse
106
+
107
+ Create a schema object that mirrors the visual hierarchy of your document.
108
+
109
+ ```typescript
110
+ import { getParsedDocument, ParseSchema } from "@yuji-min/google-docs-parser";
111
+
112
+ // 1. Define the schema
113
+ const resumeSchema = {
114
+ sections: [
115
+ {
116
+ // Matches a "Heading 1" named 'Profile'
117
+ title: { name: "Profile", namedStyleType: "HEADING_1" },
118
+ // content is undefined -> defaults to simple text block
119
+ },
120
+ {
121
+ // Matches a "Heading 1" named 'Experience'
122
+ title: { name: "Experience", namedStyleType: "HEADING_1" },
123
+ content: {
124
+ kind: "tree", // This section is a hierarchical tree
125
+ node: {
126
+ // The tree nodes start with "Heading 2"
127
+ // We can also parse the heading text itself!
128
+ title: {
129
+ namedStyleType: "HEADING_2",
130
+ keys: ["company", "role"],
131
+ delimiter: "|",
132
+ },
133
+ // Under each H2, treat the content as a list
134
+ content: { kind: "list" },
135
+ },
136
+ },
137
+ },
138
+ ],
139
+ } as const; // 'as const' is CRITICAL for type inference
140
+
141
+ // 2. Fetch and Parse
142
+ async function main() {
143
+ const docId = "YOUR_GOOGLE_DOC_ID";
144
+
145
+ try {
146
+ // 'data' is fully typed based on resumeSchema!
147
+ const data = await getParsedDocument(docId, resumeSchema);
148
+ console.log(JSON.stringify(data, null, 2));
149
+ } catch (error) {
150
+ console.error(error);
151
+ }
152
+ }
153
+
154
+ main();
155
+ ```
156
+
157
+ ### 3. The Result
158
+
159
+ ```json
160
+ {
161
+ "Profile": "Senior Software Engineer based in Seoul.",
162
+ "Experience": [
163
+ {
164
+ "company": "Tech Corp",
165
+ "role": "Backend Lead",
166
+ "content": ["Designed microservices architecture", "Managed a team of 5"]
167
+ },
168
+ {
169
+ "company": "Startup Inc",
170
+ "role": "Full Stack",
171
+ "content": ["Built MVP in 3 months"]
172
+ }
173
+ ]
174
+ }
175
+ ```
176
+
177
+ ---
178
+
179
+ ## 📚 Parsing Schema Guide
180
+
181
+ The `ParseSchema` object controls how the parser reads your document.
182
+
183
+ ### Section Configuration
184
+
185
+ | Property | Type | Description |
186
+ | :--------------------- | :------- | :--------------------------------------------------------------------------------------------- |
187
+ | `title.name` | `string` | The text of the heading to find (case-insensitive). This becomes the key in the result object. |
188
+ | `title.namedStyleType` | `string` | The Google Docs style to match (e.g., `HEADING_1`, `TITLE`). |
189
+ | `content` | `Object` | (Optional) Defines the content structure. If omitted, parses as a text block. |
190
+
191
+ ### Content Kinds
192
+
193
+ #### 1. Text Block (Default)
194
+
195
+ If `content` is undefined, the parser collects all paragraphs following the header until the next section starts, joining them into a single string.
196
+
197
+ #### 2. List (`kind: "list"`)
198
+
199
+ Parses paragraphs as an array. Useful for bullet points or simple lists.
200
+
201
+ - **`isFlatten`**: (boolean) If true, merges multiple lines into a single flat array.
202
+ - **`keyDelimiter`**: (string) Parses "Key: Value" lines into `{ key: "...", value: [...] }` objects.
203
+ - **`delimiter`**: (string) Splits a line by a character (e.g., comma) into an array.
204
+
205
+ #### 3. Tree (`kind: "tree"`)
206
+
207
+ Parses hierarchical structures. Ideal for nested sections like "H2 -> H3 -> Content".
208
+
209
+ - **`node`**: Defines the schema for the child nodes.
210
+ - **Strict Nesting**: The parser automatically stops collecting children when it encounters a heading of the same or higher level (e.g., an H2 stops an open H2 block).
211
+
212
+ ---
213
+
214
+ ## 🧪 Testing
215
+
216
+ We use **Vitest** for testing. The repository includes a comprehensive test suite covering parsers, cursors, and authentication logic.
217
+
218
+ ```bash
219
+ # Run tests
220
+ npm test
221
+
222
+ # Run tests with coverage
223
+ npm run test:coverage
224
+ ```
225
+
226
+ ---
227
+
228
+ ## 🤝 Contributing
229
+
230
+ Contributions are welcome! If you find a bug or have a feature request, please open an issue.
231
+
232
+ 1. Fork the repository
233
+ 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
234
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
235
+ 4. Push to the branch (`git push origin feature/amazing-feature`)
236
+ 5. Open a Pull Request
237
+
238
+ ---
239
+
240
+ ## 📃 License
241
+
242
+ This project is licensed under the MIT License. See the [LICENSE](https://www.google.com/search?q=LICENSE) file for details.
package/dist/index.cjs ADDED
@@ -0,0 +1,426 @@
1
+ 'use strict';
2
+
3
+ var googleapis = require('googleapis');
4
+ var googleAuthLibrary = require('google-auth-library');
5
+
6
+ // src/constants.ts
7
+ var VALID_NAMED_STYLES = [
8
+ "HEADING_1",
9
+ "HEADING_2",
10
+ "HEADING_3",
11
+ "HEADING_4",
12
+ "HEADING_5",
13
+ "HEADING_6",
14
+ "TITLE",
15
+ "SUBTITLE"
16
+ ];
17
+ var VALID_NAMED_STYLES_SET = new Set(VALID_NAMED_STYLES);
18
+
19
+ // src/utils.ts
20
+ function extractParagraphText(paragraph) {
21
+ const elements = paragraph.elements ?? [];
22
+ const text = elements.map((el) => el.textRun?.content || "").join("").trim().replace(/\n/g, " ");
23
+ return text || "";
24
+ }
25
+ function hasNamedStyle(paragraph, namedStyleType) {
26
+ if (!namedStyleType) return false;
27
+ const style = paragraph.paragraphStyle?.namedStyleType;
28
+ return style === namedStyleType;
29
+ }
30
+ function getParagraphNamedStyleType(paragraph) {
31
+ if (!paragraph.paragraphStyle && !paragraph.elements?.length) {
32
+ return void 0;
33
+ }
34
+ if (!paragraph.paragraphStyle) {
35
+ return "NORMAL_TEXT";
36
+ }
37
+ const namedStyleType = paragraph.paragraphStyle.namedStyleType;
38
+ if (!namedStyleType) {
39
+ return "NORMAL_TEXT";
40
+ }
41
+ if (VALID_NAMED_STYLES_SET.has(namedStyleType)) {
42
+ return namedStyleType;
43
+ }
44
+ return void 0;
45
+ }
46
+ function isNamedStyleType(style) {
47
+ if (typeof style !== "string") return false;
48
+ if (style === "NORMAL_TEXT") return false;
49
+ return VALID_NAMED_STYLES_SET.has(style);
50
+ }
51
+ function splitAndTrim(text, delimiter, filterEmpty = false) {
52
+ if (text === "") {
53
+ return [];
54
+ }
55
+ const items = text.split(delimiter).map((t) => t.trim());
56
+ return filterEmpty ? items.filter((t) => t.length > 0) : items;
57
+ }
58
+ function parseToKeyedList(text, keyDelimiter, listDelimiter) {
59
+ const delimiterIndex = text.indexOf(keyDelimiter);
60
+ if (delimiterIndex <= 0) {
61
+ return text;
62
+ }
63
+ const key = text.substring(0, delimiterIndex).trim();
64
+ const valuePart = text.substring(delimiterIndex + keyDelimiter.length).trim();
65
+ const value = valuePart ? splitAndTrim(valuePart, listDelimiter, true) : [];
66
+ return { key, value };
67
+ }
68
+ function parseToFields(text, keys, delimiter) {
69
+ const values = splitAndTrim(text, delimiter, false);
70
+ return keys.reduce((acc, key, index) => {
71
+ const value = values[index];
72
+ acc[key] = value !== void 0 && value !== "" ? value : "";
73
+ return acc;
74
+ }, {});
75
+ }
76
+ function parseDelimitedList(text, delimiter) {
77
+ const values = text.split(delimiter).map((v) => v.trim()).filter((v) => v.length > 0);
78
+ return values;
79
+ }
80
+ function parseStructuredText(text, schema) {
81
+ const delimiter = schema.delimiter || ",";
82
+ if (schema.keyDelimiter) {
83
+ return parseToKeyedList(text, schema.keyDelimiter, delimiter);
84
+ }
85
+ if (schema.keys && schema.keys.length > 0) {
86
+ return parseToFields(text, schema.keys, delimiter);
87
+ }
88
+ return parseDelimitedList(text, delimiter);
89
+ }
90
+
91
+ // src/tree.ts
92
+ var CONTENT_KEY = "content";
93
+ function collectNodeStylesRecursive(node, set) {
94
+ if (node.title.namedStyleType) set.add(node.title.namedStyleType);
95
+ if (node.content && node.content.kind === "tree") {
96
+ collectNodeStylesRecursive(node.content.node, set);
97
+ }
98
+ }
99
+ function createNodeFromTitle(text, titleSchema) {
100
+ const hasDelimiterSchema = !!titleSchema.keyDelimiter || !!titleSchema.keys && titleSchema.keys.length > 0 || !!titleSchema.delimiter;
101
+ if (hasDelimiterSchema) {
102
+ const structuredText = parseStructuredText(text, titleSchema);
103
+ if (Array.isArray(structuredText)) {
104
+ return { title: structuredText, content: [] };
105
+ } else if (typeof structuredText === "object" && structuredText !== null) {
106
+ return { title: structuredText, content: [] };
107
+ } else {
108
+ return { title: text, content: [] };
109
+ }
110
+ } else {
111
+ return { title: text, content: [] };
112
+ }
113
+ }
114
+ function determineTreeParsingAction(paragraph, cursor, nodeSchema, ancestorNodeList) {
115
+ const nodeTitleStyle = nodeSchema.title.namedStyleType;
116
+ const childNode = nodeSchema.content?.kind === "tree" ? nodeSchema.content.node : void 0;
117
+ const isCurrentNodeTitle = hasNamedStyle(paragraph, nodeTitleStyle);
118
+ const isChildNodeTitle = !!childNode && hasNamedStyle(paragraph, childNode.title.namedStyleType);
119
+ const isAncestorNodeTitle = ancestorNodeList.some(
120
+ (a) => hasNamedStyle(paragraph, a.title.namedStyleType)
121
+ );
122
+ const isInThisTree = isCurrentNodeTitle || isChildNodeTitle || isAncestorNodeTitle;
123
+ const isHeading = cursor.isAtParagraphHeading();
124
+ const isAtNewSection = cursor.isAtNewSection();
125
+ const isHeadingOutsideThisTree = isHeading && !isInThisTree;
126
+ const isHigherLevelHeading = isHeading && isAncestorNodeTitle;
127
+ const isSameLevelHeading = isHeading && isCurrentNodeTitle;
128
+ if (isAtNewSection) return { kind: "exitSection" };
129
+ if (isCurrentNodeTitle) return { kind: "createNode" };
130
+ if (isChildNodeTitle) return { kind: "startChildNode" };
131
+ if (isHigherLevelHeading || isSameLevelHeading)
132
+ return { kind: "finishCurrentNode" };
133
+ if (isHeadingOutsideThisTree) return { kind: "exitSection" };
134
+ return { kind: "appendDetail" };
135
+ }
136
+ function parseTreeSection(cursor, section, allNodeTitleStyles) {
137
+ const treeContent = section.content;
138
+ const nodeSchema = treeContent?.kind === "tree" ? treeContent.node : void 0;
139
+ if (!treeContent || !nodeSchema) return [];
140
+ while (!cursor.isEndOfDocument()) {
141
+ const info = cursor.getCurrentParagraph();
142
+ if (!info) {
143
+ cursor.getNextParagraph();
144
+ continue;
145
+ }
146
+ const { paragraph, style } = info;
147
+ if (cursor.isAtNewSection()) break;
148
+ if (cursor.isAtParagraphHeading() && !allNodeTitleStyles.has(style)) break;
149
+ if (hasNamedStyle(paragraph, nodeSchema.title.namedStyleType)) {
150
+ return parseTreeNode(cursor, nodeSchema, [], allNodeTitleStyles);
151
+ }
152
+ cursor.getNextParagraph();
153
+ }
154
+ return [];
155
+ }
156
+ function parseTreeNode(cursor, nodeSchema, ancestorNodeList, allNodeTitleStyles) {
157
+ const result = [];
158
+ let currentNode = null;
159
+ const childNodeSchema = nodeSchema.content?.kind === "tree" ? nodeSchema.content.node : void 0;
160
+ while (!cursor.isEndOfDocument()) {
161
+ const info = cursor.getCurrentParagraph();
162
+ if (!info) {
163
+ cursor.getNextParagraph();
164
+ continue;
165
+ }
166
+ const { text, paragraph, style } = info;
167
+ if (cursor.isAtNewSection()) {
168
+ return result;
169
+ }
170
+ if (cursor.isAtParagraphHeading() && !allNodeTitleStyles.has(style)) {
171
+ return result;
172
+ }
173
+ const decision = determineTreeParsingAction(
174
+ paragraph,
175
+ cursor,
176
+ nodeSchema,
177
+ ancestorNodeList
178
+ );
179
+ switch (decision.kind) {
180
+ case "exitSection": {
181
+ return result;
182
+ }
183
+ case "createNode": {
184
+ currentNode = createNodeFromTitle(text, nodeSchema.title);
185
+ result.push(currentNode);
186
+ cursor.getNextParagraph();
187
+ break;
188
+ }
189
+ case "startChildNode": {
190
+ if (!currentNode || !Array.isArray(currentNode[CONTENT_KEY]) || !childNodeSchema) {
191
+ cursor.getNextParagraph();
192
+ break;
193
+ }
194
+ const children = parseTreeNode(
195
+ cursor,
196
+ childNodeSchema,
197
+ [nodeSchema, ...ancestorNodeList],
198
+ allNodeTitleStyles
199
+ );
200
+ currentNode[CONTENT_KEY].push(...children);
201
+ break;
202
+ }
203
+ case "finishCurrentNode": {
204
+ if (currentNode) return result;
205
+ cursor.getNextParagraph();
206
+ break;
207
+ }
208
+ case "appendDetail": {
209
+ if (nodeSchema.content?.kind === "tree") {
210
+ cursor.getNextParagraph();
211
+ break;
212
+ }
213
+ if (currentNode && Array.isArray(currentNode[CONTENT_KEY])) {
214
+ currentNode[CONTENT_KEY].push(text.trim());
215
+ }
216
+ cursor.getNextParagraph();
217
+ break;
218
+ }
219
+ }
220
+ }
221
+ return result;
222
+ }
223
+
224
+ // src/list.ts
225
+ function parseListSection(cursor, section) {
226
+ const result = [];
227
+ if (!section.content || section.content.kind !== "list") {
228
+ return [];
229
+ }
230
+ const contentSchema = section.content;
231
+ while (!cursor.isEndOfDocument()) {
232
+ const info = cursor.getCurrentParagraph();
233
+ if (!info) {
234
+ cursor.getNextParagraph();
235
+ continue;
236
+ }
237
+ if (cursor.isAtNewSection()) break;
238
+ if (cursor.isAtParagraphHeading()) break;
239
+ const parsed = parseStructuredText(info.text, contentSchema);
240
+ if (contentSchema.isFlatten && Array.isArray(parsed)) {
241
+ result.push(...parsed);
242
+ } else {
243
+ result.push(parsed);
244
+ }
245
+ cursor.getNextParagraph();
246
+ }
247
+ return result;
248
+ }
249
+
250
+ // src/textBlock.ts
251
+ function parseTextBlockSection(cursor) {
252
+ const textPartList = [];
253
+ while (!cursor.isEndOfDocument()) {
254
+ const paragraph = cursor.getCurrentParagraph();
255
+ if (!paragraph) {
256
+ cursor.getNextParagraph();
257
+ continue;
258
+ }
259
+ if (cursor.isAtNewSection()) break;
260
+ if (cursor.isAtParagraphHeading()) break;
261
+ textPartList.push(paragraph.text);
262
+ cursor.getNextParagraph();
263
+ }
264
+ return textPartList.join(" ");
265
+ }
266
+
267
+ // src/section.ts
268
+ function getSectionTitle(paragraph, text, parseSchema) {
269
+ const normalized = text.trim().toLowerCase();
270
+ const sectionList = parseSchema.sections ?? [];
271
+ for (const section of sectionList) {
272
+ const { name, namedStyleType } = section.title;
273
+ if (name && namedStyleType) {
274
+ const styleMatches = hasNamedStyle(paragraph, namedStyleType);
275
+ const textMatches = normalized === name.trim().toLowerCase();
276
+ if (styleMatches && textMatches) {
277
+ return name;
278
+ }
279
+ }
280
+ }
281
+ return null;
282
+ }
283
+ function parseSectionContent(cursor, section) {
284
+ const content = section.content;
285
+ if (!content) {
286
+ return parseTextBlockSection(cursor);
287
+ }
288
+ switch (content.kind) {
289
+ case "tree": {
290
+ if (!content.node) {
291
+ return [];
292
+ }
293
+ const allNodeTitleStyles = /* @__PURE__ */ new Set();
294
+ collectNodeStylesRecursive(content.node, allNodeTitleStyles);
295
+ return parseTreeSection(cursor, section, allNodeTitleStyles);
296
+ }
297
+ case "list":
298
+ return parseListSection(cursor, section);
299
+ default:
300
+ return parseTextBlockSection(cursor);
301
+ }
302
+ }
303
+
304
+ // src/cursor.ts
305
+ function getParagraph(paragraph) {
306
+ const text = extractParagraphText(paragraph);
307
+ if (!text) return null;
308
+ const style = getParagraphNamedStyleType(paragraph);
309
+ return { text, style, paragraph };
310
+ }
311
+ var ParagraphCursor = class {
312
+ constructor(paragraphList, parseSchema) {
313
+ this.paragraphList = paragraphList;
314
+ this.parseSchema = parseSchema;
315
+ }
316
+ index = 0;
317
+ /**
318
+ * Retrieves the paragraph at the current cursor position.
319
+ *
320
+ * @returns The current `Paragraph` object, or `null` if the cursor is at the end of the document or the line is empty.
321
+ */
322
+ getCurrentParagraph() {
323
+ if (this.isEndOfDocument()) return null;
324
+ const paragraph = this.paragraphList[this.index];
325
+ if (!paragraph) return null;
326
+ return getParagraph(paragraph);
327
+ }
328
+ /**
329
+ * Advances the cursor to the next position and returns the new paragraph.
330
+ *
331
+ * @returns The next `Paragraph` object, or `null` if the end of the document is reached.
332
+ */
333
+ getNextParagraph() {
334
+ if (this.isEndOfDocument()) return null;
335
+ this.index++;
336
+ return this.getCurrentParagraph();
337
+ }
338
+ /**
339
+ * Checks if the cursor has reached the end of the paragraph list.
340
+ */
341
+ isEndOfDocument() {
342
+ return this.index >= this.paragraphList.length;
343
+ }
344
+ /**
345
+ * Determines if the current paragraph corresponds to a section title defined in the schema.
346
+ *
347
+ * @returns The section name if matched, otherwise `null`.
348
+ */
349
+ getCurrentSectionTitle() {
350
+ const info = this.getCurrentParagraph();
351
+ if (!info) return null;
352
+ return getSectionTitle(info.paragraph, info.text, this.parseSchema);
353
+ }
354
+ /**
355
+ * Checks if the current cursor position marks the start of a new section.
356
+ */
357
+ isAtNewSection() {
358
+ return this.getCurrentSectionTitle() !== null;
359
+ }
360
+ /**
361
+ * Checks if the current paragraph has a named heading style (e.g., HEADING_1).
362
+ */
363
+ isAtParagraphHeading() {
364
+ const info = this.getCurrentParagraph();
365
+ return !!info && isNamedStyleType(info.style);
366
+ }
367
+ };
368
+ function createDocsClient() {
369
+ try {
370
+ const auth = new googleAuthLibrary.GoogleAuth({
371
+ scopes: ["https://www.googleapis.com/auth/documents.readonly"]
372
+ });
373
+ return googleapis.google.docs({
374
+ version: "v1",
375
+ auth
376
+ });
377
+ } catch (error) {
378
+ console.error("Error initializing Google Docs client:", error);
379
+ throw new Error(
380
+ "Failed to initialize Google Docs client. Check setup and credentials."
381
+ );
382
+ }
383
+ }
384
+
385
+ // src/parser.ts
386
+ function parseDocument(doc, parseSchema) {
387
+ const content = doc.body?.content || [];
388
+ const result = {};
389
+ const validParagraphList = content.map((element) => element.paragraph).filter((paragraph) => !!paragraph);
390
+ const cursor = new ParagraphCursor(validParagraphList, parseSchema);
391
+ while (!cursor.isEndOfDocument()) {
392
+ const currentSectionTitle = cursor.getCurrentSectionTitle();
393
+ if (currentSectionTitle) {
394
+ const section = parseSchema.sections.find(
395
+ (s) => s.title.name === currentSectionTitle
396
+ );
397
+ if (section) {
398
+ cursor.getNextParagraph();
399
+ const parsedData = parseSectionContent(cursor, section);
400
+ result[currentSectionTitle] = parsedData;
401
+ continue;
402
+ }
403
+ }
404
+ cursor.getNextParagraph();
405
+ }
406
+ return result;
407
+ }
408
+ async function getParsedDocument(documentId, parseSchema) {
409
+ try {
410
+ const docs = createDocsClient();
411
+ const response = await docs.documents.get({ documentId });
412
+ if (!response.data) {
413
+ throw new Error("Empty document response from Google Docs API.");
414
+ }
415
+ const parsedDocument = parseDocument(response.data, parseSchema);
416
+ return parsedDocument;
417
+ } catch (e) {
418
+ throw new Error(
419
+ `Google Docs API call failed. Check Doc ID and Service Account permissions. Original error: ${e instanceof Error ? e.message : String(e)}`
420
+ );
421
+ }
422
+ }
423
+
424
+ exports.getParsedDocument = getParsedDocument;
425
+ //# sourceMappingURL=index.cjs.map
426
+ //# sourceMappingURL=index.cjs.map