@yuji-min/google-docs-parser 1.0.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,16 +36,33 @@ This library solves that complexity by allowing you to define a **Schema** that
36
36
 
37
37
  ## 📦 Installation
38
38
 
39
+ ### Node.js / Traditional Environments
40
+
39
41
  ```bash
40
42
  npm install @yuji-min/google-docs-parser googleapis google-auth-library
41
43
  # or
42
44
  yarn add @yuji-min/google-docs-parser googleapis google-auth-library
43
45
  ```
44
46
 
47
+ ### Edge Runtime (Cloudflare Workers, Vercel Edge, etc.)
48
+
49
+ ```bash
50
+ npm install @yuji-min/google-docs-parser
51
+ # or
52
+ yarn add @yuji-min/google-docs-parser
53
+ ```
54
+
55
+ > **Note:** The Edge Runtime version (`/edge`) does **not** require `googleapis` or `google-auth-library` dependencies. It uses native Web APIs (Fetch, Web Crypto) instead.
56
+
45
57
  ---
46
58
 
47
59
  ## 🔑 Authentication & Setup
48
60
 
61
+ This library supports two runtime environments with different authentication approaches:
62
+
63
+ - **Node.js**: Uses `googleapis` and `google-auth-library` (traditional approach)
64
+ - **Edge Runtime**: Uses native Web APIs with JSON credentials (Cloudflare Workers, Vercel Edge, etc.)
65
+
49
66
  To use this library, you need a Google Cloud Service Account with access to the Google Docs API.
50
67
 
51
68
  ### 1. Create Google Cloud Credentials
@@ -58,6 +75,8 @@ To use this library, you need a Google Cloud Service Account with access to the
58
75
 
59
76
  ### 2. Configure Environment Variable
60
77
 
78
+ #### Node.js (File Path)
79
+
61
80
  Set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to the path of your downloaded JSON key file.
62
81
 
63
82
  **Mac/Linux:**
@@ -74,6 +93,34 @@ $env:GOOGLE_APPLICATION_CREDENTIALS="C:\path\to\your\service-account-key.json"
74
93
 
75
94
  > **Note:** The library uses `google-auth-library` internally, which automatically looks for credentials at the path defined in this environment variable.
76
95
 
96
+ #### Edge Runtime (JSON String)
97
+
98
+ For Edge Runtime environments, set `GOOGLE_APPLICATION_CREDENTIALS` to the **JSON string** (not a file path):
99
+
100
+ **Cloudflare Workers (wrangler.toml):**
101
+
102
+ ```toml
103
+ [vars]
104
+ GOOGLE_APPLICATION_CREDENTIALS = '''
105
+ {
106
+ "type": "service_account",
107
+ "project_id": "your-project-id",
108
+ "private_key_id": "key-id",
109
+ "private_key": "-----BEGIN PRIVATE KEY-----\n...\n-----END PRIVATE KEY-----\n",
110
+ "client_email": "your-service-account@project.iam.gserviceaccount.com",
111
+ "client_id": "123456789",
112
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
113
+ "token_uri": "https://oauth2.googleapis.com/token",
114
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
115
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/..."
116
+ }
117
+ '''
118
+ ```
119
+
120
+ **Vercel Edge Functions:**
121
+
122
+ Add the JSON as an environment variable in your Vercel project settings, or use `.env.local` during development.
123
+
77
124
  ### 3. Share the Document (Important!)
78
125
 
79
126
  Google Docs are private by default. You must share the target document with your Service Account's email address (found in your JSON key, e.g., my-bot@my-project.iam.gserviceaccount.com) with Viewer permission.
@@ -191,6 +238,99 @@ Tree nodes always have a consistent `{ title, content }` structure:
191
238
 
192
239
  ---
193
240
 
241
+ ## ☁️ Edge Runtime Usage
242
+
243
+ The `/edge` export is specifically designed for Edge Runtime environments like Cloudflare Workers and Vercel Edge Functions.
244
+
245
+ ### Cloudflare Workers Example
246
+
247
+ ```typescript
248
+ import { getParsedDocument } from "@yuji-min/google-docs-parser/edge";
249
+ import type { ParseSchema } from "@yuji-min/google-docs-parser/edge";
250
+
251
+ const schema = {
252
+ sections: [
253
+ { title: { name: "Profile", namedStyleType: "HEADING_1" } },
254
+ {
255
+ title: { name: "Experience", namedStyleType: "HEADING_1" },
256
+ content: {
257
+ kind: "tree",
258
+ node: {
259
+ title: {
260
+ namedStyleType: "HEADING_2",
261
+ keys: ["company", "role"],
262
+ delimiter: "|",
263
+ },
264
+ content: { kind: "list" },
265
+ },
266
+ },
267
+ },
268
+ ],
269
+ } as const satisfies ParseSchema;
270
+
271
+ export default {
272
+ async fetch(request: Request, env: Env): Promise<Response> {
273
+ // Set credentials from environment variable
274
+ process.env.GOOGLE_APPLICATION_CREDENTIALS = env.GOOGLE_CREDENTIALS;
275
+
276
+ const docId = "YOUR_GOOGLE_DOC_ID";
277
+
278
+ try {
279
+ const data = await getParsedDocument(docId, schema);
280
+ return Response.json(data);
281
+ } catch (error) {
282
+ return Response.json(
283
+ { error: error instanceof Error ? error.message : "Unknown error" },
284
+ { status: 500 }
285
+ );
286
+ }
287
+ },
288
+ };
289
+ ```
290
+
291
+ ### Vercel Edge Functions Example
292
+
293
+ ```typescript
294
+ // app/api/docs/route.ts
295
+ import { getParsedDocument } from "@yuji-min/google-docs-parser/edge";
296
+ import type { ParseSchema } from "@yuji-min/google-docs-parser/edge";
297
+
298
+ export const runtime = "edge";
299
+
300
+ const schema = {
301
+ /* your schema */
302
+ } as const satisfies ParseSchema;
303
+
304
+ export async function GET(request: Request) {
305
+ // Credentials are automatically loaded from process.env.GOOGLE_APPLICATION_CREDENTIALS
306
+ const docId = "YOUR_GOOGLE_DOC_ID";
307
+
308
+ try {
309
+ const data = await getParsedDocument(docId, schema);
310
+ return Response.json(data);
311
+ } catch (error) {
312
+ return Response.json(
313
+ { error: error instanceof Error ? error.message : "Unknown error" },
314
+ { status: 500 }
315
+ );
316
+ }
317
+ }
318
+ ```
319
+
320
+ ### Key Differences: Node.js vs Edge Runtime
321
+
322
+ | Feature | Node.js (`/`) | Edge Runtime (`/edge`) |
323
+ | :------------------------- | :---------------------------------- | :---------------------------------- |
324
+ | **Import Path** | `@yuji-min/google-docs-parser` | `@yuji-min/google-docs-parser/edge` |
325
+ | **Dependencies** | Requires `googleapis` + auth lib | No external dependencies |
326
+ | **Credentials Format** | File path or JSON string | JSON string only |
327
+ | **Authentication** | `google-auth-library` | Native Web Crypto API |
328
+ | **HTTP Client** | `googleapis` client | Native `fetch` |
329
+ | **Bundle Size** | Larger (~400KB+) | Smaller (~16KB) |
330
+ | **Supported Environments** | Node.js 18+ | Cloudflare, Vercel Edge, Deno, etc. |
331
+
332
+ ---
333
+
194
334
  ## 📚 Parsing Schema Guide
195
335
 
196
336
  The `ParseSchema` object controls how the parser reads your document. Use `GetParsedType<typeof schema>` to infer the exact TypeScript type from your schema.
package/dist/edge.cjs ADDED
@@ -0,0 +1,530 @@
1
+ 'use strict';
2
+
3
+ // src/edge/auth.ts
4
+ function base64UrlEncode(data) {
5
+ const base64 = btoa(String.fromCharCode(...data));
6
+ return base64.replace(/\+/g, "-").replace(/\//g, "_").replace(/=/g, "");
7
+ }
8
+ function stringToUint8Array(str) {
9
+ return new TextEncoder().encode(str);
10
+ }
11
+ async function importPrivateKey(pemKey) {
12
+ const pemContents = pemKey.replace(/-----BEGIN PRIVATE KEY-----/, "").replace(/-----END PRIVATE KEY-----/, "").replace(/\s/g, "");
13
+ const binaryDer = Uint8Array.from(atob(pemContents), (c) => c.charCodeAt(0));
14
+ return crypto.subtle.importKey(
15
+ "pkcs8",
16
+ binaryDer,
17
+ {
18
+ name: "RSASSA-PKCS1-v1_5",
19
+ hash: "SHA-256"
20
+ },
21
+ false,
22
+ ["sign"]
23
+ );
24
+ }
25
+ async function createSignedJWT(credentials, scope) {
26
+ const now = Math.floor(Date.now() / 1e3);
27
+ const expiry = now + 3600;
28
+ const header = {
29
+ alg: "RS256",
30
+ typ: "JWT"
31
+ };
32
+ const payload = {
33
+ iss: credentials.client_email,
34
+ scope,
35
+ aud: credentials.token_uri,
36
+ exp: expiry,
37
+ iat: now
38
+ };
39
+ const encodedHeader = base64UrlEncode(
40
+ stringToUint8Array(JSON.stringify(header))
41
+ );
42
+ const encodedPayload = base64UrlEncode(
43
+ stringToUint8Array(JSON.stringify(payload))
44
+ );
45
+ const unsignedToken = `${encodedHeader}.${encodedPayload}`;
46
+ const privateKey = await importPrivateKey(credentials.private_key);
47
+ const dataToSign = stringToUint8Array(unsignedToken);
48
+ const signature = await crypto.subtle.sign(
49
+ "RSASSA-PKCS1-v1_5",
50
+ privateKey,
51
+ dataToSign
52
+ );
53
+ const encodedSignature = base64UrlEncode(new Uint8Array(signature));
54
+ return `${unsignedToken}.${encodedSignature}`;
55
+ }
56
+ async function getAccessToken(credentials) {
57
+ const scope = "https://www.googleapis.com/auth/documents.readonly";
58
+ const jwt = await createSignedJWT(credentials, scope);
59
+ const response = await fetch(credentials.token_uri, {
60
+ method: "POST",
61
+ headers: {
62
+ "Content-Type": "application/x-www-form-urlencoded"
63
+ },
64
+ body: new URLSearchParams({
65
+ grant_type: "urn:ietf:params:oauth:grant-type:jwt-bearer",
66
+ assertion: jwt
67
+ })
68
+ });
69
+ if (!response.ok) {
70
+ const errorText = await response.text();
71
+ throw new Error(
72
+ `Failed to get access token: ${response.status} ${errorText}`
73
+ );
74
+ }
75
+ const data = await response.json();
76
+ return data.access_token;
77
+ }
78
+ function loadCredentials() {
79
+ const credentialsEnv = process.env.GOOGLE_APPLICATION_CREDENTIALS;
80
+ if (!credentialsEnv) {
81
+ throw new Error(
82
+ "GOOGLE_APPLICATION_CREDENTIALS environment variable is not set"
83
+ );
84
+ }
85
+ try {
86
+ if (credentialsEnv.trim().startsWith("{")) {
87
+ return JSON.parse(credentialsEnv);
88
+ }
89
+ throw new Error(
90
+ "File path credentials are not supported in Edge Runtime. Use JSON string instead."
91
+ );
92
+ } catch (error) {
93
+ throw new Error(
94
+ `Failed to parse credentials: ${error instanceof Error ? error.message : String(error)}`
95
+ );
96
+ }
97
+ }
98
+ async function createAuthenticatedClient() {
99
+ const credentials = loadCredentials();
100
+ return getAccessToken(credentials);
101
+ }
102
+
103
+ // src/edge/client.ts
104
+ async function fetchDocument(documentId, accessToken) {
105
+ const url = `https://docs.googleapis.com/v1/documents/${documentId}`;
106
+ const response = await fetch(url, {
107
+ method: "GET",
108
+ headers: {
109
+ Authorization: `Bearer ${accessToken}`,
110
+ "Content-Type": "application/json"
111
+ }
112
+ });
113
+ if (!response.ok) {
114
+ const errorText = await response.text();
115
+ throw new Error(
116
+ `Google Docs API request failed: ${response.status} ${errorText}`
117
+ );
118
+ }
119
+ const data = await response.json();
120
+ if (!data.body) {
121
+ throw new Error("Empty document response from Google Docs API.");
122
+ }
123
+ return data;
124
+ }
125
+ async function getDocument(documentId) {
126
+ const accessToken = await createAuthenticatedClient();
127
+ return fetchDocument(documentId, accessToken);
128
+ }
129
+
130
+ // src/constants.ts
131
+ var VALID_NAMED_STYLES = [
132
+ "HEADING_1",
133
+ "HEADING_2",
134
+ "HEADING_3",
135
+ "HEADING_4",
136
+ "HEADING_5",
137
+ "HEADING_6",
138
+ "TITLE",
139
+ "SUBTITLE"
140
+ ];
141
+ var VALID_NAMED_STYLES_SET = new Set(VALID_NAMED_STYLES);
142
+
143
+ // src/utils.ts
144
+ function extractParagraphText(paragraph) {
145
+ const elements = paragraph.elements ?? [];
146
+ const text = elements.map((el) => el.textRun?.content || "").join("").trim().replace(/\n/g, " ");
147
+ return text || "";
148
+ }
149
+ function hasNamedStyle(paragraph, namedStyleType) {
150
+ if (!namedStyleType) return false;
151
+ const style = paragraph.paragraphStyle?.namedStyleType;
152
+ return style === namedStyleType;
153
+ }
154
+ function getParagraphNamedStyleType(paragraph) {
155
+ if (!paragraph.paragraphStyle && !paragraph.elements?.length) {
156
+ return void 0;
157
+ }
158
+ if (!paragraph.paragraphStyle) {
159
+ return "NORMAL_TEXT";
160
+ }
161
+ const namedStyleType = paragraph.paragraphStyle.namedStyleType;
162
+ if (!namedStyleType) {
163
+ return "NORMAL_TEXT";
164
+ }
165
+ if (VALID_NAMED_STYLES_SET.has(namedStyleType)) {
166
+ return namedStyleType;
167
+ }
168
+ return void 0;
169
+ }
170
+ function isNamedStyleType(style) {
171
+ if (typeof style !== "string") return false;
172
+ if (style === "NORMAL_TEXT") return false;
173
+ return VALID_NAMED_STYLES_SET.has(style);
174
+ }
175
+ function splitAndTrim(text, delimiter, filterEmpty = false) {
176
+ if (text === "") {
177
+ return [];
178
+ }
179
+ const items = text.split(delimiter).map((t) => t.trim());
180
+ return filterEmpty ? items.filter((t) => t.length > 0) : items;
181
+ }
182
+ function parseToKeyedList(text, keyDelimiter, listDelimiter) {
183
+ const delimiterIndex = text.indexOf(keyDelimiter);
184
+ if (delimiterIndex <= 0) {
185
+ return text;
186
+ }
187
+ const key = text.substring(0, delimiterIndex).trim();
188
+ const valuePart = text.substring(delimiterIndex + keyDelimiter.length).trim();
189
+ const value = valuePart ? splitAndTrim(valuePart, listDelimiter, true) : [];
190
+ return { key, value };
191
+ }
192
+ function parseToFields(text, keys, delimiter) {
193
+ const values = splitAndTrim(text, delimiter, false);
194
+ return keys.reduce((acc, key, index) => {
195
+ const value = values[index];
196
+ acc[key] = value !== void 0 && value !== "" ? value : "";
197
+ return acc;
198
+ }, {});
199
+ }
200
+ function parseDelimitedList(text, delimiter) {
201
+ const values = text.split(delimiter).map((v) => v.trim()).filter((v) => v.length > 0);
202
+ return values;
203
+ }
204
+ function parseStructuredText(text, schema) {
205
+ const delimiter = schema.delimiter || ",";
206
+ if (schema.keyDelimiter) {
207
+ return parseToKeyedList(text, schema.keyDelimiter, delimiter);
208
+ }
209
+ if (schema.keys && schema.keys.length > 0) {
210
+ return parseToFields(text, schema.keys, delimiter);
211
+ }
212
+ return parseDelimitedList(text, delimiter);
213
+ }
214
+
215
+ // src/tree.ts
216
+ var CONTENT_KEY = "content";
217
+ function collectNodeStylesRecursive(node, set) {
218
+ if (node.title.namedStyleType) set.add(node.title.namedStyleType);
219
+ if (node.content && node.content.kind === "tree") {
220
+ collectNodeStylesRecursive(node.content.node, set);
221
+ }
222
+ }
223
+ function createNodeFromTitle(text, titleSchema) {
224
+ const hasDelimiterSchema = !!titleSchema.keyDelimiter || !!titleSchema.keys && titleSchema.keys.length > 0 || !!titleSchema.delimiter;
225
+ if (hasDelimiterSchema) {
226
+ const structuredText = parseStructuredText(text, titleSchema);
227
+ if (Array.isArray(structuredText)) {
228
+ return { title: structuredText, content: [] };
229
+ } else if (typeof structuredText === "object" && structuredText !== null) {
230
+ return { title: structuredText, content: [] };
231
+ } else {
232
+ return { title: text, content: [] };
233
+ }
234
+ } else {
235
+ return { title: text, content: [] };
236
+ }
237
+ }
238
+ function determineTreeParsingAction(paragraph, cursor, nodeSchema, ancestorNodeList) {
239
+ const nodeTitleStyle = nodeSchema.title.namedStyleType;
240
+ const childNode = nodeSchema.content?.kind === "tree" ? nodeSchema.content.node : void 0;
241
+ const isCurrentNodeTitle = hasNamedStyle(paragraph, nodeTitleStyle);
242
+ const isChildNodeTitle = !!childNode && hasNamedStyle(paragraph, childNode.title.namedStyleType);
243
+ const isAncestorNodeTitle = ancestorNodeList.some(
244
+ (a) => hasNamedStyle(paragraph, a.title.namedStyleType)
245
+ );
246
+ const isInThisTree = isCurrentNodeTitle || isChildNodeTitle || isAncestorNodeTitle;
247
+ const isHeading = cursor.isAtParagraphHeading();
248
+ const isAtNewSection = cursor.isAtNewSection();
249
+ const isHeadingOutsideThisTree = isHeading && !isInThisTree;
250
+ const isHigherLevelHeading = isHeading && isAncestorNodeTitle;
251
+ const isSameLevelHeading = isHeading && isCurrentNodeTitle;
252
+ if (isAtNewSection) return { kind: "exitSection" };
253
+ if (isCurrentNodeTitle) return { kind: "createNode" };
254
+ if (isChildNodeTitle) return { kind: "startChildNode" };
255
+ if (isHigherLevelHeading || isSameLevelHeading)
256
+ return { kind: "finishCurrentNode" };
257
+ if (isHeadingOutsideThisTree) return { kind: "exitSection" };
258
+ return { kind: "appendDetail" };
259
+ }
260
+ function parseTreeSection(cursor, section, allNodeTitleStyles) {
261
+ const treeContent = section.content;
262
+ const nodeSchema = treeContent?.kind === "tree" ? treeContent.node : void 0;
263
+ if (!treeContent || !nodeSchema) return [];
264
+ while (!cursor.isEndOfDocument()) {
265
+ const info = cursor.getCurrentParagraph();
266
+ if (!info) {
267
+ cursor.getNextParagraph();
268
+ continue;
269
+ }
270
+ const { paragraph, style } = info;
271
+ if (cursor.isAtNewSection()) break;
272
+ if (cursor.isAtParagraphHeading() && style && !allNodeTitleStyles.has(style)) break;
273
+ if (hasNamedStyle(paragraph, nodeSchema.title.namedStyleType)) {
274
+ return parseTreeNode(cursor, nodeSchema, [], allNodeTitleStyles);
275
+ }
276
+ cursor.getNextParagraph();
277
+ }
278
+ return [];
279
+ }
280
+ function parseTreeNode(cursor, nodeSchema, ancestorNodeList, allNodeTitleStyles) {
281
+ const result = [];
282
+ let currentNode = null;
283
+ const childNodeSchema = nodeSchema.content?.kind === "tree" ? nodeSchema.content.node : void 0;
284
+ while (!cursor.isEndOfDocument()) {
285
+ const info = cursor.getCurrentParagraph();
286
+ if (!info) {
287
+ cursor.getNextParagraph();
288
+ continue;
289
+ }
290
+ const { text, paragraph, style } = info;
291
+ if (cursor.isAtNewSection()) {
292
+ return result;
293
+ }
294
+ if (cursor.isAtParagraphHeading() && style && !allNodeTitleStyles.has(style)) {
295
+ return result;
296
+ }
297
+ const decision = determineTreeParsingAction(
298
+ paragraph,
299
+ cursor,
300
+ nodeSchema,
301
+ ancestorNodeList
302
+ );
303
+ switch (decision.kind) {
304
+ case "exitSection": {
305
+ return result;
306
+ }
307
+ case "createNode": {
308
+ currentNode = createNodeFromTitle(text, nodeSchema.title);
309
+ result.push(currentNode);
310
+ cursor.getNextParagraph();
311
+ break;
312
+ }
313
+ case "startChildNode": {
314
+ if (!currentNode || !Array.isArray(currentNode[CONTENT_KEY]) || !childNodeSchema) {
315
+ cursor.getNextParagraph();
316
+ break;
317
+ }
318
+ const children = parseTreeNode(
319
+ cursor,
320
+ childNodeSchema,
321
+ [nodeSchema, ...ancestorNodeList],
322
+ allNodeTitleStyles
323
+ );
324
+ currentNode[CONTENT_KEY].push(...children);
325
+ break;
326
+ }
327
+ case "finishCurrentNode": {
328
+ if (currentNode) return result;
329
+ cursor.getNextParagraph();
330
+ break;
331
+ }
332
+ case "appendDetail": {
333
+ if (nodeSchema.content?.kind === "tree") {
334
+ cursor.getNextParagraph();
335
+ break;
336
+ }
337
+ if (currentNode && Array.isArray(currentNode[CONTENT_KEY])) {
338
+ currentNode[CONTENT_KEY].push(text.trim());
339
+ }
340
+ cursor.getNextParagraph();
341
+ break;
342
+ }
343
+ }
344
+ }
345
+ return result;
346
+ }
347
+
348
+ // src/list.ts
349
+ function parseListSection(cursor, section) {
350
+ const result = [];
351
+ if (!section.content || section.content.kind !== "list") {
352
+ return [];
353
+ }
354
+ const contentSchema = section.content;
355
+ while (!cursor.isEndOfDocument()) {
356
+ const info = cursor.getCurrentParagraph();
357
+ if (!info) {
358
+ cursor.getNextParagraph();
359
+ continue;
360
+ }
361
+ if (cursor.isAtNewSection()) break;
362
+ if (cursor.isAtParagraphHeading()) break;
363
+ const parsed = parseStructuredText(info.text, contentSchema);
364
+ if (contentSchema.isFlatten && Array.isArray(parsed)) {
365
+ result.push(...parsed);
366
+ } else {
367
+ result.push(parsed);
368
+ }
369
+ cursor.getNextParagraph();
370
+ }
371
+ return result;
372
+ }
373
+
374
+ // src/textBlock.ts
375
+ function parseTextBlockSection(cursor) {
376
+ const textPartList = [];
377
+ while (!cursor.isEndOfDocument()) {
378
+ const paragraph = cursor.getCurrentParagraph();
379
+ if (!paragraph) {
380
+ cursor.getNextParagraph();
381
+ continue;
382
+ }
383
+ if (cursor.isAtNewSection()) break;
384
+ if (cursor.isAtParagraphHeading()) break;
385
+ textPartList.push(paragraph.text);
386
+ cursor.getNextParagraph();
387
+ }
388
+ return textPartList.join(" ");
389
+ }
390
+
391
+ // src/section.ts
392
+ function getSectionTitle(paragraph, text, parseSchema) {
393
+ const normalized = text.trim().toLowerCase();
394
+ const sectionList = parseSchema.sections ?? [];
395
+ for (const section of sectionList) {
396
+ const { name, namedStyleType } = section.title;
397
+ if (name && namedStyleType) {
398
+ const styleMatches = hasNamedStyle(paragraph, namedStyleType);
399
+ const textMatches = normalized === name.trim().toLowerCase();
400
+ if (styleMatches && textMatches) {
401
+ return name;
402
+ }
403
+ }
404
+ }
405
+ return null;
406
+ }
407
+ function parseSectionContent(cursor, section) {
408
+ const content = section.content;
409
+ if (!content) {
410
+ return parseTextBlockSection(cursor);
411
+ }
412
+ switch (content.kind) {
413
+ case "tree": {
414
+ if (!content.node) {
415
+ return [];
416
+ }
417
+ const allNodeTitleStyles = /* @__PURE__ */ new Set();
418
+ collectNodeStylesRecursive(content.node, allNodeTitleStyles);
419
+ return parseTreeSection(cursor, section, allNodeTitleStyles);
420
+ }
421
+ case "list":
422
+ return parseListSection(cursor, section);
423
+ default:
424
+ return parseTextBlockSection(cursor);
425
+ }
426
+ }
427
+
428
+ // src/cursor.ts
429
+ function getParagraph(paragraph) {
430
+ const text = extractParagraphText(paragraph);
431
+ if (!text) return null;
432
+ const style = getParagraphNamedStyleType(paragraph);
433
+ return { text, style, paragraph };
434
+ }
435
+ var ParagraphCursor = class {
436
+ constructor(paragraphList, parseSchema) {
437
+ this.paragraphList = paragraphList;
438
+ this.parseSchema = parseSchema;
439
+ }
440
+ index = 0;
441
+ /**
442
+ * Retrieves the paragraph at the current cursor position.
443
+ *
444
+ * @returns The current `Paragraph` object, or `null` if the cursor is at the end of the document or the line is empty.
445
+ */
446
+ getCurrentParagraph() {
447
+ if (this.isEndOfDocument()) return null;
448
+ const paragraph = this.paragraphList[this.index];
449
+ if (!paragraph) return null;
450
+ return getParagraph(paragraph);
451
+ }
452
+ /**
453
+ * Advances the cursor to the next position and returns the new paragraph.
454
+ *
455
+ * @returns The next `Paragraph` object, or `null` if the end of the document is reached.
456
+ */
457
+ getNextParagraph() {
458
+ if (this.isEndOfDocument()) return null;
459
+ this.index++;
460
+ return this.getCurrentParagraph();
461
+ }
462
+ /**
463
+ * Checks if the cursor has reached the end of the paragraph list.
464
+ */
465
+ isEndOfDocument() {
466
+ return this.index >= this.paragraphList.length;
467
+ }
468
+ /**
469
+ * Determines if the current paragraph corresponds to a section title defined in the schema.
470
+ *
471
+ * @returns The section name if matched, otherwise `null`.
472
+ */
473
+ getCurrentSectionTitle() {
474
+ const info = this.getCurrentParagraph();
475
+ if (!info) return null;
476
+ return getSectionTitle(info.paragraph, info.text, this.parseSchema);
477
+ }
478
+ /**
479
+ * Checks if the current cursor position marks the start of a new section.
480
+ */
481
+ isAtNewSection() {
482
+ return this.getCurrentSectionTitle() !== null;
483
+ }
484
+ /**
485
+ * Checks if the current paragraph has a named heading style (e.g., HEADING_1).
486
+ */
487
+ isAtParagraphHeading() {
488
+ const info = this.getCurrentParagraph();
489
+ return !!info && isNamedStyleType(info.style);
490
+ }
491
+ };
492
+
493
+ // src/edge/index.ts
494
+ function parseDocument(doc, parseSchema) {
495
+ const content = doc.body?.content || [];
496
+ const result = {};
497
+ const validParagraphList = content.map((element) => element.paragraph).filter((paragraph) => !!paragraph);
498
+ const cursor = new ParagraphCursor(validParagraphList, parseSchema);
499
+ while (!cursor.isEndOfDocument()) {
500
+ const currentSectionTitle = cursor.getCurrentSectionTitle();
501
+ if (currentSectionTitle) {
502
+ const section = parseSchema.sections.find(
503
+ (s) => s.title.name === currentSectionTitle
504
+ );
505
+ if (section) {
506
+ cursor.getNextParagraph();
507
+ const parsedData = parseSectionContent(cursor, section);
508
+ result[currentSectionTitle] = parsedData;
509
+ continue;
510
+ }
511
+ }
512
+ cursor.getNextParagraph();
513
+ }
514
+ return result;
515
+ }
516
+ async function getParsedDocument(documentId, parseSchema) {
517
+ try {
518
+ const doc = await getDocument(documentId);
519
+ const parsedDocument = parseDocument(doc, parseSchema);
520
+ return parsedDocument;
521
+ } catch (e) {
522
+ throw new Error(
523
+ `Google Docs API call failed. Check Doc ID and Service Account permissions. Original error: ${e instanceof Error ? e.message : String(e)}`
524
+ );
525
+ }
526
+ }
527
+
528
+ exports.getParsedDocument = getParsedDocument;
529
+ //# sourceMappingURL=edge.cjs.map
530
+ //# sourceMappingURL=edge.cjs.map