docsmith-mcp 0.0.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","names":["filePath: string","scriptPath: string","options: RunPythonFileOptions","runPyOptions: RunPyOptions","filePath: string","fileType: string","packages: Record<string, Record<string, string>>","scriptName: string","scriptArgs: string[]"],"sources":["../src/code-runner.ts","../src/utils.ts","../src/index.ts"],"sourcesContent":["/**\n * Code runner client - uses @mcpc-tech/code-runner-mcp npm package\n */\nimport { runPy, type RunPyOptions } from \"@mcpc-tech/code-runner-mcp\";\nimport { readFileSync } from \"fs\";\nimport { fileURLToPath } from \"url\";\nimport { dirname, join, resolve } from \"path\";\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = dirname(__filename);\n\n/**\n * Options for running Python script files\n */\nexport interface RunPythonFileOptions {\n /** Command line arguments to pass to the script */\n args?: string[];\n /** Package name mappings (import_name -> pypi_name) */\n packages?: Record<string, string>;\n /** Base directory for the script (default: \"python\") */\n baseDir?: string;\n /** User file paths that need to be accessible (for file system mounting) */\n filePaths?: string[];\n}\n\n/**\n * Convert absolute file path to Pyodide virtual path\n * Determines the mount root and converts the path accordingly\n *\n * @param filePath - Absolute path to the file\n * @returns Object with mountRoot (host path) and virtualPath (Pyodide path)\n */\nfunction getFileSystemMapping(\n filePath: string,\n): { mountRoot: string; virtualPath: string } {\n const absolutePath = resolve(filePath);\n\n // Mount the parent directory of the file\n // This allows Python to access the file and its siblings\n const mountRoot = dirname(absolutePath);\n const virtualPath = absolutePath;\n\n return { mountRoot, virtualPath };\n}\n\n/**\n * Run a Python script file using code-runner-mcp\n *\n * @param scriptPath - Path to the Python script (relative to baseDir)\n * @param options - Execution options\n * @returns The execution result\n */\nexport async function runPythonFile(\n scriptPath: string,\n options: RunPythonFileOptions = {},\n): Promise<any> {\n const {\n args = [],\n packages = {},\n baseDir = \"python\",\n filePaths = [],\n } = options;\n\n // Read the Python script\n const fullPath = join(__dirname, \"..\", baseDir, scriptPath);\n const scriptContent = readFileSync(fullPath, \"utf-8\");\n\n // Build wrapper code that sets sys.argv and executes the script\n const wrapperCode = `\nimport sys\nimport json\n\n# Set command line arguments\nsys.argv = ['${scriptPath}'] + ${JSON.stringify(args)}\n\n# Execute the script\n${scriptContent}\n`;\n\n // Determine mount root from the first file path\n let mountRoot = join(__dirname, \"..\"); // Default: project root\n if (filePaths.length > 0) {\n const mapping = getFileSystemMapping(filePaths[0]);\n mountRoot = mapping.mountRoot;\n }\n\n // Execute via runPy with options\n // Mount point is the same as the mount root (Pyodide will see host paths directly)\n const runPyOptions: RunPyOptions = {\n packages,\n nodeFSMountPoint: mountRoot,\n nodeFSRoot: mountRoot,\n };\n const stream = await runPy(wrapperCode, runPyOptions);\n\n // Read the stream output\n const reader = stream.getReader();\n const decoder = new TextDecoder();\n let stdout = \"\";\n let stderr = \"\";\n let error = \"\";\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n\n const chunk = decoder.decode(value, { stream: true });\n if (chunk.startsWith(\"[stderr] \")) {\n stderr += chunk.slice(9);\n } else if (chunk.startsWith(\"[err]\")) {\n error += chunk;\n } else {\n stdout += chunk;\n }\n }\n } catch (streamError) {\n // Stream error means Python execution failed\n return { error: String(streamError) };\n }\n\n // Check for errors\n if (error) {\n return { error: error.replace(/\\[err\\]\\[py\\]\\s*/g, \"\").trim() };\n }\n\n // Parse the JSON output from the script (last line)\n const lines = stdout.trim().split(\"\\n\");\n const lastLine = lines[lines.length - 1];\n\n try {\n return JSON.parse(lastLine);\n } catch {\n return { stdout, stderr };\n }\n}\n","/**\n * Utility functions for document processing\n */\n\n/**\n * Detect file type from file extension\n */\nexport function detectFileType(\n filePath: string,\n): \"excel\" | \"word\" | \"pdf\" | \"text\" | null {\n const ext = filePath.toLowerCase().split(\".\").pop();\n if (ext === \"xlsx\" || ext === \"xls\") return \"excel\";\n if (ext === \"docx\") return \"word\";\n if (ext === \"pdf\") return \"pdf\";\n if (\n ext === \"txt\" || ext === \"csv\" || ext === \"md\" || ext === \"json\" ||\n ext === \"yaml\" || ext === \"yml\"\n ) return \"text\";\n return null;\n}\n\n/**\n * Get required packages for each file type\n */\nexport function getPackages(fileType: string): Record<string, string> {\n const packages: Record<string, Record<string, string>> = {\n excel: { openpyxl: \"openpyxl\" },\n word: { docx: \"python-docx\" }, // Map docx import to python-docx package\n pdf: { PyPDF2: \"PyPDF2\" },\n text: {}, // No external packages needed for text files\n };\n return packages[fileType] || {};\n}\n\n/**\n * Get environment configuration\n */\nexport function getConfig() {\n return {\n rawFullRead: process.env.DOC_RAW_FULL_READ === \"true\",\n pageSize: parseInt(process.env.DOC_PAGE_SIZE || \"100\", 10),\n maxFileSize: parseInt(process.env.DOC_MAX_FILE_SIZE || \"50\", 10) * 1024 *\n 1024,\n };\n}\n","#!/usr/bin/env node\n\nimport { Server } from \"@modelcontextprotocol/sdk/server/index.js\";\nimport { StdioServerTransport } from \"@modelcontextprotocol/sdk/server/stdio.js\";\nimport {\n CallToolRequestSchema,\n ListToolsRequestSchema,\n} from \"@modelcontextprotocol/sdk/types.js\";\nimport { z } from \"zod\";\nimport { runPythonFile } from \"./code-runner.js\";\nimport { detectFileType, getConfig, getPackages } from \"./utils.js\";\n\n// Tool schemas\nconst ReadDocumentSchema = z.object({\n file_path: z.string().describe(\"Absolute path to the document file\"),\n mode: z.enum([\"raw\", \"paginated\"]).optional().describe(\n \"Read mode: 'raw' for full content, 'paginated' for chunked reading\",\n ),\n page: z.number().optional().describe(\n \"Page number for paginated mode (1-based)\",\n ),\n page_size: z.number().optional().describe(\n \"Items per page for paginated mode\",\n ),\n sheet_name: z.string().optional().describe(\"Sheet name for Excel files\"),\n});\n\nconst WriteDocumentSchema = z.object({\n file_path: z.string().describe(\"Absolute path to save the document\"),\n format: z.enum([\"excel\", \"word\", \"text\"]).describe(\"Document format\"),\n data: z.any().describe(\"Document data structure\"),\n});\n\nconst GetDocumentInfoSchema = z.object({\n file_path: z.string().describe(\"Absolute path to the document file\"),\n});\n\n// Server setup\nconst server = new Server(\n {\n name: \"docsmith-mcp\",\n version: \"0.1.0\",\n },\n {\n capabilities: {\n tools: {},\n },\n },\n);\n\n// Output schemas (reusable)\nconst BaseOutputSchema = {\n type: \"object\",\n properties: {\n success: { type: \"boolean\", description: \"Operation success status\" },\n error: { type: \"string\", description: \"Error message if failed\" },\n },\n} as const;\n\nconst PaginationSchema = {\n current_page: { type: \"number\", description: \"Current page number\" },\n page_size: { type: \"number\", description: \"Items per page\" },\n total_pages: { type: \"number\", description: \"Total number of pages\" },\n page: { type: \"number\", description: \"Current page number (alternative)\" },\n has_more: { type: \"boolean\", description: \"Whether more pages exist\" },\n} as const;\n\nconst ExcelReadOutputSchema = {\n type: \"object\",\n properties: {\n sheet_name: { type: \"string\", description: \"Active sheet name\" },\n sheets: {\n type: \"array\",\n items: { type: \"string\" },\n description: \"All sheet names\",\n },\n total_rows: { type: \"number\", description: \"Total rows in sheet\" },\n total_cols: { type: \"number\", description: \"Total columns in sheet\" },\n data: {\n type: \"array\",\n items: { type: \"array\", items: {} },\n description: \"Sheet data as array of rows\",\n },\n ...PaginationSchema,\n },\n} as const;\n\nconst WordReadOutputSchema = {\n type: \"object\",\n properties: {\n paragraphs: {\n type: \"array\",\n items: { type: \"string\" },\n description: \"Document paragraphs\",\n },\n tables: {\n type: \"array\",\n items: {\n type: \"array\",\n items: { type: \"array\", items: { type: \"string\" } },\n },\n description: \"Tables data\",\n },\n total_paragraphs: { type: \"number\", description: \"Total paragraph count\" },\n total_tables: { type: \"number\", description: \"Total table count\" },\n ...PaginationSchema,\n },\n} as const;\n\nconst PDFReadOutputSchema = {\n type: \"object\",\n properties: {\n total_pages: { type: \"number\", description: \"Total pages in PDF\" },\n content: {\n type: \"array\",\n items: {\n type: \"object\",\n properties: {\n page_number: { type: \"number\" },\n text: { type: \"string\" },\n },\n },\n description: \"Page content array\",\n },\n current_page_group: { type: \"number\" },\n page_size: { type: \"number\" },\n },\n} as const;\n\nconst TextReadOutputSchema = {\n type: \"object\",\n properties: {\n ...BaseOutputSchema.properties,\n content: { type: \"string\", description: \"Text content\" },\n total_lines: { type: \"number\", description: \"Total line count\" },\n encoding: { type: \"string\", description: \"File encoding\" },\n ...PaginationSchema,\n },\n} as const;\n\nconst CSVReadOutputSchema = {\n type: \"object\",\n properties: {\n ...BaseOutputSchema.properties,\n headers: {\n type: \"array\",\n items: { type: \"string\" },\n description: \"CSV headers\",\n },\n data: {\n type: \"array\",\n items: { type: \"object\" },\n description: \"Structured data as array of objects\",\n },\n total_rows: { type: \"number\", description: \"Total data rows\" },\n encoding: { type: \"string\", description: \"File encoding\" },\n ...PaginationSchema,\n },\n} as const;\n\nconst JSONReadOutputSchema = {\n type: \"object\",\n properties: {\n ...BaseOutputSchema.properties,\n data: { type: \"object\", description: \"Parsed JSON data\" },\n encoding: { type: \"string\", description: \"File encoding\" },\n },\n} as const;\n\nconst WriteOutputSchema = {\n type: \"object\",\n properties: {\n success: { type: \"boolean\", description: \"Write operation success\" },\n file_path: { type: \"string\", description: \"Written file path\" },\n message: { type: \"string\", description: \"Success message\" },\n error: { type: \"string\", description: \"Error message if failed\" },\n },\n} as const;\n\nconst ExcelInfoOutputSchema = {\n type: \"object\",\n properties: {\n sheets: {\n type: \"array\",\n items: {\n type: \"object\",\n properties: {\n name: { type: \"string\" },\n rows: { type: \"number\" },\n cols: { type: \"number\" },\n },\n },\n description: \"Sheet information\",\n },\n file_size: { type: \"number\", description: \"File size in bytes\" },\n },\n} as const;\n\nconst WordInfoOutputSchema = {\n type: \"object\",\n properties: {\n paragraphs: { type: \"number\", description: \"Paragraph count\" },\n tables: { type: \"number\", description: \"Table count\" },\n file_size: { type: \"number\", description: \"File size in bytes\" },\n },\n} as const;\n\nconst PDFInfoOutputSchema = {\n type: \"object\",\n properties: {\n pages: { type: \"number\", description: \"Page count\" },\n file_size: { type: \"number\", description: \"File size in bytes\" },\n total_words: { type: \"number\", description: \"Total word count\" },\n },\n} as const;\n\nconst TextInfoOutputSchema = {\n type: \"object\",\n properties: {\n ...BaseOutputSchema.properties,\n file_size: { type: \"number\", description: \"File size in bytes\" },\n line_count: { type: \"number\", description: \"Line count\" },\n encoding: { type: \"string\", description: \"File encoding\" },\n file_type: { type: \"string\", description: \"File extension\" },\n // CSV specific\n headers: { type: \"array\", items: { type: \"string\" } },\n total_rows: { type: \"number\" },\n total_cols: { type: \"number\" },\n // JSON specific\n item_count: { type: \"number\" },\n key_count: { type: \"number\" },\n },\n} as const;\n\n// List available tools\nserver.setRequestHandler(ListToolsRequestSchema, async () => {\n return {\n tools: [\n {\n name: \"read_document\",\n description:\n \"Read document content (Excel, Word, PDF, TXT, CSV, Markdown, JSON, YAML). Supports raw full read or paginated mode.\",\n inputSchema: {\n type: \"object\",\n properties: {\n file_path: {\n type: \"string\",\n description: \"Absolute path to the document file\",\n },\n mode: {\n type: \"string\",\n enum: [\"raw\", \"paginated\"],\n description: \"Read mode\",\n },\n page: {\n type: \"number\",\n description: \"Page number for paginated mode\",\n },\n page_size: { type: \"number\", description: \"Items per page\" },\n sheet_name: {\n type: \"string\",\n description: \"Sheet name for Excel files\",\n },\n },\n required: [\"file_path\"],\n },\n outputSchema: {\n type: \"object\",\n description:\n \"Returns different structures based on file type: Excel (sheet data), Word (paragraphs/tables), PDF (page content), Text (plain text), CSV (structured rows), JSON (parsed object)\",\n oneOf: [\n ExcelReadOutputSchema,\n WordReadOutputSchema,\n PDFReadOutputSchema,\n TextReadOutputSchema,\n CSVReadOutputSchema,\n JSONReadOutputSchema,\n ],\n },\n },\n {\n name: \"write_document\",\n description: \"Write document content (Excel, Word, Text)\",\n inputSchema: {\n type: \"object\",\n properties: {\n file_path: {\n type: \"string\",\n description: \"Absolute path to save the document\",\n },\n format: {\n type: \"string\",\n enum: [\"excel\", \"word\", \"text\"],\n description: \"Document format\",\n },\n data: {\n description:\n \"Document data structure. Excel: array of rows [[cell1, cell2], ...]. Word: {paragraphs: string[], tables?: [[[cell]]]}. Text/CSV/JSON: string or object\",\n },\n },\n required: [\"file_path\", \"format\", \"data\"],\n },\n outputSchema: WriteOutputSchema,\n },\n {\n name: \"get_document_info\",\n description:\n \"Get document metadata (page count, sheet count, file size, etc.)\",\n inputSchema: {\n type: \"object\",\n properties: {\n file_path: {\n type: \"string\",\n description: \"Absolute path to the document file\",\n },\n },\n required: [\"file_path\"],\n },\n outputSchema: {\n type: \"object\",\n description: \"Returns metadata based on file type\",\n oneOf: [\n ExcelInfoOutputSchema,\n WordInfoOutputSchema,\n PDFInfoOutputSchema,\n TextInfoOutputSchema,\n ],\n },\n },\n ],\n };\n});\n\n// Handle tool calls\nserver.setRequestHandler(CallToolRequestSchema, async (request) => {\n const { name, arguments: args } = request.params;\n\n try {\n if (name === \"read_document\") {\n const params = ReadDocumentSchema.parse(args);\n const fileType = detectFileType(params.file_path);\n\n if (!fileType) {\n throw new Error(`Unsupported file type: ${params.file_path}`);\n }\n\n // Determine read mode\n const config = getConfig();\n const mode = params.mode || (config.rawFullRead ? \"raw\" : \"paginated\");\n const page = mode === \"paginated\" ? (params.page || 1) : undefined;\n const pageSize = params.page_size || config.pageSize;\n\n let scriptName: string;\n let scriptArgs: string[];\n\n if (fileType === \"excel\") {\n scriptName = \"excel_handler.py\";\n scriptArgs = [\"read\", params.file_path];\n if (params.sheet_name) scriptArgs.push(params.sheet_name);\n if (page) {\n scriptArgs.push(String(page));\n scriptArgs.push(String(pageSize));\n }\n } else if (fileType === \"word\") {\n scriptName = \"word_handler.py\";\n scriptArgs = [\"read\", params.file_path];\n if (page) {\n scriptArgs.push(String(page));\n scriptArgs.push(String(pageSize));\n }\n } else if (fileType === \"pdf\") {\n scriptName = \"pdf_handler.py\";\n scriptArgs = [\"read\", params.file_path];\n if (page) {\n scriptArgs.push(String(page));\n scriptArgs.push(String(Math.min(pageSize, 10))); // PDF pages are larger\n }\n } else {\n // text files\n scriptName = \"text_handler.py\";\n scriptArgs = [\"read\", params.file_path];\n if (page) {\n scriptArgs.push(String(page));\n scriptArgs.push(String(pageSize));\n }\n }\n\n const result = await runPythonFile(scriptName, {\n args: scriptArgs,\n packages: getPackages(fileType),\n filePaths: [params.file_path],\n });\n return {\n content: [{\n type: \"text\",\n text: JSON.stringify(result, null, 2),\n }],\n structuredContent: result,\n };\n }\n\n if (name === \"write_document\") {\n const params = WriteDocumentSchema.parse(args);\n\n let scriptName: string;\n let scriptArgs: string[];\n\n if (params.format === \"excel\") {\n scriptName = \"excel_handler.py\";\n scriptArgs = [\"write\", params.file_path, JSON.stringify(params.data)];\n } else if (params.format === \"word\") {\n scriptName = \"word_handler.py\";\n const paragraphs = params.data.paragraphs || [];\n const tables = params.data.tables || null;\n scriptArgs = [\"write\", params.file_path, JSON.stringify(paragraphs)];\n if (tables) scriptArgs.push(JSON.stringify(tables));\n } else if (params.format === \"text\") {\n scriptName = \"text_handler.py\";\n const content = typeof params.data === \"string\"\n ? params.data\n : JSON.stringify(params.data);\n scriptArgs = [\"write\", params.file_path, content];\n } else {\n throw new Error(`Unsupported write format: ${params.format}`);\n }\n\n const result = await runPythonFile(scriptName, {\n args: scriptArgs,\n packages: getPackages(params.format),\n filePaths: [params.file_path],\n });\n return {\n content: [{\n type: \"text\",\n text: JSON.stringify(result, null, 2),\n }],\n structuredContent: result,\n };\n }\n\n if (name === \"get_document_info\") {\n const params = GetDocumentInfoSchema.parse(args);\n const fileType = detectFileType(params.file_path);\n\n if (!fileType) {\n throw new Error(`Unsupported file type: ${params.file_path}`);\n }\n\n let scriptName: string;\n let scriptArgs = [\"info\", params.file_path];\n\n if (fileType === \"excel\") {\n scriptName = \"excel_handler.py\";\n } else if (fileType === \"word\") {\n scriptName = \"word_handler.py\";\n } else if (fileType === \"pdf\") {\n scriptName = \"pdf_handler.py\";\n } else {\n scriptName = \"text_handler.py\";\n }\n\n const result = await runPythonFile(scriptName, {\n args: scriptArgs,\n packages: getPackages(fileType),\n filePaths: [params.file_path],\n });\n return {\n content: [{\n type: \"text\",\n text: JSON.stringify(result, null, 2),\n }],\n structuredContent: result,\n };\n }\n\n throw new Error(`Unknown tool: ${name}`);\n } catch (error) {\n const errorMessage = error instanceof Error ? error.message : String(error);\n return {\n content: [{ type: \"text\", text: `Error: ${errorMessage}` }],\n isError: true,\n };\n }\n});\n\n// Start server\nasync function main() {\n const transport = new StdioServerTransport();\n await server.connect(transport);\n console.error(\"Docsmith MCP server running on stdio\");\n}\n\nmain().catch((error) => {\n console.error(\"Server error:\", error);\n process.exit(1);\n});\n"],"mappings":";;;;;;;;;;;AAQA,MAAM,aAAa,cAAc,OAAO,KAAK,IAAI;AACjD,MAAM,YAAY,QAAQ,WAAW;;;;;;;;AAuBrC,SAAS,qBACPA,UAC4C;CAC5C,MAAM,eAAe,QAAQ,SAAS;CAItC,MAAM,YAAY,QAAQ,aAAa;CACvC,MAAM,cAAc;AAEpB,QAAO;EAAE;EAAW;CAAa;AAClC;;;;;;;;AASD,eAAsB,cACpBC,YACAC,UAAgC,CAAE,GACpB;CACd,MAAM,EACJ,OAAO,CAAE,GACT,WAAW,CAAE,GACb,UAAU,UACV,YAAY,CAAE,GACf,GAAG;CAGJ,MAAM,WAAW,KAAK,WAAW,MAAM,SAAS,WAAW;CAC3D,MAAM,gBAAgB,aAAa,UAAU,QAAQ;CAGrD,MAAM,eAAe;;;;;eAKR,WAAW,OAAO,KAAK,UAAU,KAAK,CAAC;;;EAGpD,cAAc;;CAId,IAAI,YAAY,KAAK,WAAW,KAAK;AACrC,KAAI,UAAU,SAAS,GAAG;EACxB,MAAM,UAAU,qBAAqB,UAAU,GAAG;AAClD,cAAY,QAAQ;CACrB;CAID,MAAMC,eAA6B;EACjC;EACA,kBAAkB;EAClB,YAAY;CACb;CACD,MAAM,SAAS,MAAM,MAAM,aAAa,aAAa;CAGrD,MAAM,SAAS,OAAO,WAAW;CACjC,MAAM,UAAU,IAAI;CACpB,IAAI,SAAS;CACb,IAAI,SAAS;CACb,IAAI,QAAQ;AAEZ,KAAI;AACF,SAAO,MAAM;GACX,MAAM,EAAE,MAAM,OAAO,GAAG,MAAM,OAAO,MAAM;AAC3C,OAAI,KAAM;GAEV,MAAM,QAAQ,QAAQ,OAAO,OAAO,EAAE,QAAQ,KAAM,EAAC;AACrD,OAAI,MAAM,WAAW,YAAY,CAC/B,WAAU,MAAM,MAAM,EAAE;YACf,MAAM,WAAW,QAAQ,CAClC,UAAS;OAET,WAAU;EAEb;CACF,SAAQ,aAAa;AAEpB,SAAO,EAAE,OAAO,OAAO,YAAY,CAAE;CACtC;AAGD,KAAI,MACF,QAAO,EAAE,OAAO,MAAM,QAAQ,qBAAqB,GAAG,CAAC,MAAM,CAAE;CAIjE,MAAM,QAAQ,OAAO,MAAM,CAAC,MAAM,KAAK;CACvC,MAAM,WAAW,MAAM,MAAM,SAAS;AAEtC,KAAI;AACF,SAAO,KAAK,MAAM,SAAS;CAC5B,QAAO;AACN,SAAO;GAAE;GAAQ;EAAQ;CAC1B;AACF;;;;;;;;;;AChID,SAAgB,eACdC,UAC0C;CAC1C,MAAM,MAAM,SAAS,aAAa,CAAC,MAAM,IAAI,CAAC,KAAK;AACnD,KAAI,QAAQ,UAAU,QAAQ,MAAO,QAAO;AAC5C,KAAI,QAAQ,OAAQ,QAAO;AAC3B,KAAI,QAAQ,MAAO,QAAO;AAC1B,KACE,QAAQ,SAAS,QAAQ,SAAS,QAAQ,QAAQ,QAAQ,UAC1D,QAAQ,UAAU,QAAQ,MAC1B,QAAO;AACT,QAAO;AACR;;;;AAKD,SAAgB,YAAYC,UAA0C;CACpE,MAAMC,WAAmD;EACvD,OAAO,EAAE,UAAU,WAAY;EAC/B,MAAM,EAAE,MAAM,cAAe;EAC7B,KAAK,EAAE,QAAQ,SAAU;EACzB,MAAM,CAAE;CACT;AACD,QAAO,SAAS,aAAa,CAAE;AAChC;;;;AAKD,SAAgB,YAAY;AAC1B,QAAO;EACL,aAAa,QAAQ,IAAI,sBAAsB;EAC/C,UAAU,SAAS,QAAQ,IAAI,iBAAiB,OAAO,GAAG;EAC1D,aAAa,SAAS,QAAQ,IAAI,qBAAqB,MAAM,GAAG,GAAG,OACjE;CACH;AACF;;;;AC/BD,MAAM,qBAAqB,EAAE,OAAO;CAClC,WAAW,EAAE,QAAQ,CAAC,SAAS,qCAAqC;CACpE,MAAM,EAAE,KAAK,CAAC,OAAO,WAAY,EAAC,CAAC,UAAU,CAAC,SAC5C,qEACD;CACD,MAAM,EAAE,QAAQ,CAAC,UAAU,CAAC,SAC1B,2CACD;CACD,WAAW,EAAE,QAAQ,CAAC,UAAU,CAAC,SAC/B,oCACD;CACD,YAAY,EAAE,QAAQ,CAAC,UAAU,CAAC,SAAS,6BAA6B;AACzE,EAAC;AAEF,MAAM,sBAAsB,EAAE,OAAO;CACnC,WAAW,EAAE,QAAQ,CAAC,SAAS,qCAAqC;CACpE,QAAQ,EAAE,KAAK;EAAC;EAAS;EAAQ;CAAO,EAAC,CAAC,SAAS,kBAAkB;CACrE,MAAM,EAAE,KAAK,CAAC,SAAS,0BAA0B;AAClD,EAAC;AAEF,MAAM,wBAAwB,EAAE,OAAO,EACrC,WAAW,EAAE,QAAQ,CAAC,SAAS,qCAAqC,CACrE,EAAC;AAGF,MAAM,SAAS,IAAI,OACjB;CACE,MAAM;CACN,SAAS;AACV,GACD,EACE,cAAc,EACZ,OAAO,CAAE,EACV,EACF;AAIH,MAAM,mBAAmB;CACvB,MAAM;CACN,YAAY;EACV,SAAS;GAAE,MAAM;GAAW,aAAa;EAA4B;EACrE,OAAO;GAAE,MAAM;GAAU,aAAa;EAA2B;CAClE;AACF;AAED,MAAM,mBAAmB;CACvB,cAAc;EAAE,MAAM;EAAU,aAAa;CAAuB;CACpE,WAAW;EAAE,MAAM;EAAU,aAAa;CAAkB;CAC5D,aAAa;EAAE,MAAM;EAAU,aAAa;CAAyB;CACrE,MAAM;EAAE,MAAM;EAAU,aAAa;CAAqC;CAC1E,UAAU;EAAE,MAAM;EAAW,aAAa;CAA4B;AACvE;AAED,MAAM,wBAAwB;CAC5B,MAAM;CACN,YAAY;EACV,YAAY;GAAE,MAAM;GAAU,aAAa;EAAqB;EAChE,QAAQ;GACN,MAAM;GACN,OAAO,EAAE,MAAM,SAAU;GACzB,aAAa;EACd;EACD,YAAY;GAAE,MAAM;GAAU,aAAa;EAAuB;EAClE,YAAY;GAAE,MAAM;GAAU,aAAa;EAA0B;EACrE,MAAM;GACJ,MAAM;GACN,OAAO;IAAE,MAAM;IAAS,OAAO,CAAE;GAAE;GACnC,aAAa;EACd;EACD,GAAG;CACJ;AACF;AAED,MAAM,uBAAuB;CAC3B,MAAM;CACN,YAAY;EACV,YAAY;GACV,MAAM;GACN,OAAO,EAAE,MAAM,SAAU;GACzB,aAAa;EACd;EACD,QAAQ;GACN,MAAM;GACN,OAAO;IACL,MAAM;IACN,OAAO;KAAE,MAAM;KAAS,OAAO,EAAE,MAAM,SAAU;IAAE;GACpD;GACD,aAAa;EACd;EACD,kBAAkB;GAAE,MAAM;GAAU,aAAa;EAAyB;EAC1E,cAAc;GAAE,MAAM;GAAU,aAAa;EAAqB;EAClE,GAAG;CACJ;AACF;AAED,MAAM,sBAAsB;CAC1B,MAAM;CACN,YAAY;EACV,aAAa;GAAE,MAAM;GAAU,aAAa;EAAsB;EAClE,SAAS;GACP,MAAM;GACN,OAAO;IACL,MAAM;IACN,YAAY;KACV,aAAa,EAAE,MAAM,SAAU;KAC/B,MAAM,EAAE,MAAM,SAAU;IACzB;GACF;GACD,aAAa;EACd;EACD,oBAAoB,EAAE,MAAM,SAAU;EACtC,WAAW,EAAE,MAAM,SAAU;CAC9B;AACF;AAED,MAAM,uBAAuB;CAC3B,MAAM;CACN,YAAY;EACV,GAAG,iBAAiB;EACpB,SAAS;GAAE,MAAM;GAAU,aAAa;EAAgB;EACxD,aAAa;GAAE,MAAM;GAAU,aAAa;EAAoB;EAChE,UAAU;GAAE,MAAM;GAAU,aAAa;EAAiB;EAC1D,GAAG;CACJ;AACF;AAED,MAAM,sBAAsB;CAC1B,MAAM;CACN,YAAY;EACV,GAAG,iBAAiB;EACpB,SAAS;GACP,MAAM;GACN,OAAO,EAAE,MAAM,SAAU;GACzB,aAAa;EACd;EACD,MAAM;GACJ,MAAM;GACN,OAAO,EAAE,MAAM,SAAU;GACzB,aAAa;EACd;EACD,YAAY;GAAE,MAAM;GAAU,aAAa;EAAmB;EAC9D,UAAU;GAAE,MAAM;GAAU,aAAa;EAAiB;EAC1D,GAAG;CACJ;AACF;AAED,MAAM,uBAAuB;CAC3B,MAAM;CACN,YAAY;EACV,GAAG,iBAAiB;EACpB,MAAM;GAAE,MAAM;GAAU,aAAa;EAAoB;EACzD,UAAU;GAAE,MAAM;GAAU,aAAa;EAAiB;CAC3D;AACF;AAED,MAAM,oBAAoB;CACxB,MAAM;CACN,YAAY;EACV,SAAS;GAAE,MAAM;GAAW,aAAa;EAA2B;EACpE,WAAW;GAAE,MAAM;GAAU,aAAa;EAAqB;EAC/D,SAAS;GAAE,MAAM;GAAU,aAAa;EAAmB;EAC3D,OAAO;GAAE,MAAM;GAAU,aAAa;EAA2B;CAClE;AACF;AAED,MAAM,wBAAwB;CAC5B,MAAM;CACN,YAAY;EACV,QAAQ;GACN,MAAM;GACN,OAAO;IACL,MAAM;IACN,YAAY;KACV,MAAM,EAAE,MAAM,SAAU;KACxB,MAAM,EAAE,MAAM,SAAU;KACxB,MAAM,EAAE,MAAM,SAAU;IACzB;GACF;GACD,aAAa;EACd;EACD,WAAW;GAAE,MAAM;GAAU,aAAa;EAAsB;CACjE;AACF;AAED,MAAM,uBAAuB;CAC3B,MAAM;CACN,YAAY;EACV,YAAY;GAAE,MAAM;GAAU,aAAa;EAAmB;EAC9D,QAAQ;GAAE,MAAM;GAAU,aAAa;EAAe;EACtD,WAAW;GAAE,MAAM;GAAU,aAAa;EAAsB;CACjE;AACF;AAED,MAAM,sBAAsB;CAC1B,MAAM;CACN,YAAY;EACV,OAAO;GAAE,MAAM;GAAU,aAAa;EAAc;EACpD,WAAW;GAAE,MAAM;GAAU,aAAa;EAAsB;EAChE,aAAa;GAAE,MAAM;GAAU,aAAa;EAAoB;CACjE;AACF;AAED,MAAM,uBAAuB;CAC3B,MAAM;CACN,YAAY;EACV,GAAG,iBAAiB;EACpB,WAAW;GAAE,MAAM;GAAU,aAAa;EAAsB;EAChE,YAAY;GAAE,MAAM;GAAU,aAAa;EAAc;EACzD,UAAU;GAAE,MAAM;GAAU,aAAa;EAAiB;EAC1D,WAAW;GAAE,MAAM;GAAU,aAAa;EAAkB;EAE5D,SAAS;GAAE,MAAM;GAAS,OAAO,EAAE,MAAM,SAAU;EAAE;EACrD,YAAY,EAAE,MAAM,SAAU;EAC9B,YAAY,EAAE,MAAM,SAAU;EAE9B,YAAY,EAAE,MAAM,SAAU;EAC9B,WAAW,EAAE,MAAM,SAAU;CAC9B;AACF;AAGD,OAAO,kBAAkB,wBAAwB,YAAY;AAC3D,QAAO,EACL,OAAO;EACL;GACE,MAAM;GACN,aACE;GACF,aAAa;IACX,MAAM;IACN,YAAY;KACV,WAAW;MACT,MAAM;MACN,aAAa;KACd;KACD,MAAM;MACJ,MAAM;MACN,MAAM,CAAC,OAAO,WAAY;MAC1B,aAAa;KACd;KACD,MAAM;MACJ,MAAM;MACN,aAAa;KACd;KACD,WAAW;MAAE,MAAM;MAAU,aAAa;KAAkB;KAC5D,YAAY;MACV,MAAM;MACN,aAAa;KACd;IACF;IACD,UAAU,CAAC,WAAY;GACxB;GACD,cAAc;IACZ,MAAM;IACN,aACE;IACF,OAAO;KACL;KACA;KACA;KACA;KACA;KACA;IACD;GACF;EACF;EACD;GACE,MAAM;GACN,aAAa;GACb,aAAa;IACX,MAAM;IACN,YAAY;KACV,WAAW;MACT,MAAM;MACN,aAAa;KACd;KACD,QAAQ;MACN,MAAM;MACN,MAAM;OAAC;OAAS;OAAQ;MAAO;MAC/B,aAAa;KACd;KACD,MAAM,EACJ,aACE,0JACH;IACF;IACD,UAAU;KAAC;KAAa;KAAU;IAAO;GAC1C;GACD,cAAc;EACf;EACD;GACE,MAAM;GACN,aACE;GACF,aAAa;IACX,MAAM;IACN,YAAY,EACV,WAAW;KACT,MAAM;KACN,aAAa;IACd,EACF;IACD,UAAU,CAAC,WAAY;GACxB;GACD,cAAc;IACZ,MAAM;IACN,aAAa;IACb,OAAO;KACL;KACA;KACA;KACA;IACD;GACF;EACF;CACF,EACF;AACF,EAAC;AAGF,OAAO,kBAAkB,uBAAuB,OAAO,YAAY;CACjE,MAAM,EAAE,MAAM,WAAW,MAAM,GAAG,QAAQ;AAE1C,KAAI;AACF,MAAI,SAAS,iBAAiB;GAC5B,MAAM,SAAS,mBAAmB,MAAM,KAAK;GAC7C,MAAM,WAAW,eAAe,OAAO,UAAU;AAEjD,QAAK,SACH,OAAM,IAAI,OAAO,yBAAyB,OAAO,UAAU;GAI7D,MAAM,SAAS,WAAW;GAC1B,MAAM,OAAO,OAAO,SAAS,OAAO,cAAc,QAAQ;GAC1D,MAAM,OAAO,SAAS,cAAe,OAAO,QAAQ;GACpD,MAAM,WAAW,OAAO,aAAa,OAAO;GAE5C,IAAIC;GACJ,IAAIC;AAEJ,OAAI,aAAa,SAAS;AACxB,iBAAa;AACb,iBAAa,CAAC,QAAQ,OAAO,SAAU;AACvC,QAAI,OAAO,WAAY,YAAW,KAAK,OAAO,WAAW;AACzD,QAAI,MAAM;AACR,gBAAW,KAAK,OAAO,KAAK,CAAC;AAC7B,gBAAW,KAAK,OAAO,SAAS,CAAC;IAClC;GACF,WAAU,aAAa,QAAQ;AAC9B,iBAAa;AACb,iBAAa,CAAC,QAAQ,OAAO,SAAU;AACvC,QAAI,MAAM;AACR,gBAAW,KAAK,OAAO,KAAK,CAAC;AAC7B,gBAAW,KAAK,OAAO,SAAS,CAAC;IAClC;GACF,WAAU,aAAa,OAAO;AAC7B,iBAAa;AACb,iBAAa,CAAC,QAAQ,OAAO,SAAU;AACvC,QAAI,MAAM;AACR,gBAAW,KAAK,OAAO,KAAK,CAAC;AAC7B,gBAAW,KAAK,OAAO,KAAK,IAAI,UAAU,GAAG,CAAC,CAAC;IAChD;GACF,OAAM;AAEL,iBAAa;AACb,iBAAa,CAAC,QAAQ,OAAO,SAAU;AACvC,QAAI,MAAM;AACR,gBAAW,KAAK,OAAO,KAAK,CAAC;AAC7B,gBAAW,KAAK,OAAO,SAAS,CAAC;IAClC;GACF;GAED,MAAM,SAAS,MAAM,cAAc,YAAY;IAC7C,MAAM;IACN,UAAU,YAAY,SAAS;IAC/B,WAAW,CAAC,OAAO,SAAU;GAC9B,EAAC;AACF,UAAO;IACL,SAAS,CAAC;KACR,MAAM;KACN,MAAM,KAAK,UAAU,QAAQ,MAAM,EAAE;IACtC,CAAC;IACF,mBAAmB;GACpB;EACF;AAED,MAAI,SAAS,kBAAkB;GAC7B,MAAM,SAAS,oBAAoB,MAAM,KAAK;GAE9C,IAAID;GACJ,IAAIC;AAEJ,OAAI,OAAO,WAAW,SAAS;AAC7B,iBAAa;AACb,iBAAa;KAAC;KAAS,OAAO;KAAW,KAAK,UAAU,OAAO,KAAK;IAAC;GACtE,WAAU,OAAO,WAAW,QAAQ;AACnC,iBAAa;IACb,MAAM,aAAa,OAAO,KAAK,cAAc,CAAE;IAC/C,MAAM,SAAS,OAAO,KAAK,UAAU;AACrC,iBAAa;KAAC;KAAS,OAAO;KAAW,KAAK,UAAU,WAAW;IAAC;AACpE,QAAI,OAAQ,YAAW,KAAK,KAAK,UAAU,OAAO,CAAC;GACpD,WAAU,OAAO,WAAW,QAAQ;AACnC,iBAAa;IACb,MAAM,iBAAiB,OAAO,SAAS,WACnC,OAAO,OACP,KAAK,UAAU,OAAO,KAAK;AAC/B,iBAAa;KAAC;KAAS,OAAO;KAAW;IAAQ;GAClD,MACC,OAAM,IAAI,OAAO,4BAA4B,OAAO,OAAO;GAG7D,MAAM,SAAS,MAAM,cAAc,YAAY;IAC7C,MAAM;IACN,UAAU,YAAY,OAAO,OAAO;IACpC,WAAW,CAAC,OAAO,SAAU;GAC9B,EAAC;AACF,UAAO;IACL,SAAS,CAAC;KACR,MAAM;KACN,MAAM,KAAK,UAAU,QAAQ,MAAM,EAAE;IACtC,CAAC;IACF,mBAAmB;GACpB;EACF;AAED,MAAI,SAAS,qBAAqB;GAChC,MAAM,SAAS,sBAAsB,MAAM,KAAK;GAChD,MAAM,WAAW,eAAe,OAAO,UAAU;AAEjD,QAAK,SACH,OAAM,IAAI,OAAO,yBAAyB,OAAO,UAAU;GAG7D,IAAID;GACJ,IAAI,aAAa,CAAC,QAAQ,OAAO,SAAU;AAE3C,OAAI,aAAa,QACf,cAAa;YACJ,aAAa,OACtB,cAAa;YACJ,aAAa,MACtB,cAAa;OAEb,cAAa;GAGf,MAAM,SAAS,MAAM,cAAc,YAAY;IAC7C,MAAM;IACN,UAAU,YAAY,SAAS;IAC/B,WAAW,CAAC,OAAO,SAAU;GAC9B,EAAC;AACF,UAAO;IACL,SAAS,CAAC;KACR,MAAM;KACN,MAAM,KAAK,UAAU,QAAQ,MAAM,EAAE;IACtC,CAAC;IACF,mBAAmB;GACpB;EACF;AAED,QAAM,IAAI,OAAO,gBAAgB,KAAK;CACvC,SAAQ,OAAO;EACd,MAAM,eAAe,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AAC3E,SAAO;GACL,SAAS,CAAC;IAAE,MAAM;IAAQ,OAAO,SAAS,aAAa;GAAG,CAAC;GAC3D,SAAS;EACV;CACF;AACF,EAAC;AAGF,eAAe,OAAO;CACpB,MAAM,YAAY,IAAI;AACtB,OAAM,OAAO,QAAQ,UAAU;AAC/B,SAAQ,MAAM,uCAAuC;AACtD;AAED,MAAM,CAAC,MAAM,CAAC,UAAU;AACtB,SAAQ,MAAM,iBAAiB,MAAM;AACrC,SAAQ,KAAK,EAAE;AAChB,EAAC"}
@@ -0,0 +1,97 @@
1
+ """
2
+ Excel document handler - read/write Excel files
3
+ """
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ def read_excel(file_path: str, sheet_name: str = None, page: int = None, page_size: int = 100):
9
+ """Read Excel file with optional pagination"""
10
+ import openpyxl
11
+
12
+ wb = openpyxl.load_workbook(file_path, data_only=True)
13
+
14
+ if sheet_name is None:
15
+ sheet_name = wb.sheetnames[0]
16
+
17
+ ws = wb[sheet_name]
18
+
19
+ # Get all data
20
+ data = []
21
+ for row in ws.iter_rows(values_only=True):
22
+ data.append(row)
23
+
24
+ # Handle pagination
25
+ if page is not None:
26
+ start = (page - 1) * page_size
27
+ end = start + page_size
28
+ data = data[start:end]
29
+ total_pages = (len(data) + page_size - 1) // page_size if data else 1
30
+ else:
31
+ total_pages = 1
32
+
33
+ return {
34
+ "sheet_name": sheet_name,
35
+ "sheets": wb.sheetnames,
36
+ "total_rows": ws.max_row,
37
+ "total_cols": ws.max_column,
38
+ "current_page": page,
39
+ "page_size": page_size if page else None,
40
+ "total_pages": total_pages,
41
+ "data": data
42
+ }
43
+
44
+ def get_excel_info(file_path: str):
45
+ """Get Excel file metadata"""
46
+ import openpyxl
47
+
48
+ wb = openpyxl.load_workbook(file_path, data_only=True)
49
+ info = {
50
+ "sheets": [],
51
+ "file_size": Path(file_path).stat().st_size
52
+ }
53
+
54
+ for sheet_name in wb.sheetnames:
55
+ ws = wb[sheet_name]
56
+ info["sheets"].append({
57
+ "name": sheet_name,
58
+ "rows": ws.max_row,
59
+ "cols": ws.max_column
60
+ })
61
+
62
+ return info
63
+
64
+ def write_excel(file_path: str, data: list, sheet_name: str = "Sheet1"):
65
+ """Write data to Excel file"""
66
+ import openpyxl
67
+
68
+ wb = openpyxl.Workbook()
69
+ ws = wb.active
70
+ ws.title = sheet_name
71
+
72
+ for row in data:
73
+ ws.append(row)
74
+
75
+ wb.save(file_path)
76
+ return {"success": True, "file_path": file_path}
77
+
78
+ if __name__ == "__main__":
79
+ command = sys.argv[1]
80
+ file_path = sys.argv[2]
81
+
82
+ if command == "read":
83
+ sheet = sys.argv[3] if len(sys.argv) > 3 else None
84
+ page = int(sys.argv[4]) if len(sys.argv) > 4 else None
85
+ page_size = int(sys.argv[5]) if len(sys.argv) > 5 else 100
86
+ result = read_excel(file_path, sheet, page, page_size)
87
+ elif command == "info":
88
+ result = get_excel_info(file_path)
89
+ elif command == "write":
90
+ # Data passed as JSON string
91
+ data = json.loads(sys.argv[3])
92
+ sheet = sys.argv[4] if len(sys.argv) > 4 else "Sheet1"
93
+ result = write_excel(file_path, data, sheet)
94
+ else:
95
+ result = {"error": f"Unknown command: {command}"}
96
+
97
+ print(json.dumps(result, default=str))
@@ -0,0 +1,81 @@
1
+ """
2
+ PDF document handler - read PDF files using PyPDF2
3
+ """
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ def read_pdf(file_path: str, page: int = None, page_size: int = 100):
9
+ """Read PDF with optional pagination by pages"""
10
+ from PyPDF2 import PdfReader
11
+
12
+ reader = PdfReader(file_path)
13
+ total_pages = len(reader.pages)
14
+
15
+ # Handle pagination
16
+ if page is not None:
17
+ start_page = (page - 1) * page_size
18
+ end_page = min(start_page + page_size, total_pages)
19
+ pages_to_read = range(start_page, end_page)
20
+ current_page = page
21
+ total_page_groups = (total_pages + page_size - 1) // page_size
22
+ else:
23
+ pages_to_read = range(total_pages)
24
+ current_page = None
25
+ total_page_groups = 1
26
+
27
+ content = []
28
+ for i in pages_to_read:
29
+ page_obj = reader.pages[i]
30
+ text = page_obj.extract_text()
31
+ content.append({
32
+ "page_number": i + 1,
33
+ "text": text or "",
34
+ "words": len(text.split()) if text else 0
35
+ })
36
+
37
+ return {
38
+ "total_pages": total_pages,
39
+ "current_page_group": current_page,
40
+ "page_size": page_size if page else None,
41
+ "total_page_groups": total_page_groups,
42
+ "content": content
43
+ }
44
+
45
+ def get_pdf_info(file_path: str):
46
+ """Get PDF metadata"""
47
+ from PyPDF2 import PdfReader
48
+
49
+ reader = PdfReader(file_path)
50
+ info = {
51
+ "pages": len(reader.pages),
52
+ "file_size": Path(file_path).stat().st_size
53
+ }
54
+
55
+ # Try to get PDF metadata
56
+ if reader.metadata:
57
+ info["metadata"] = {k: str(v) for k, v in reader.metadata.items()}
58
+
59
+ # Count total words
60
+ total_words = 0
61
+ for page in reader.pages:
62
+ text = page.extract_text() or ""
63
+ total_words += len(text.split())
64
+ info["total_words"] = total_words
65
+
66
+ return info
67
+
68
+ if __name__ == "__main__":
69
+ command = sys.argv[1]
70
+ file_path = sys.argv[2]
71
+
72
+ if command == "read":
73
+ page = int(sys.argv[3]) if len(sys.argv) > 3 else None
74
+ page_size = int(sys.argv[4]) if len(sys.argv) > 4 else 10
75
+ result = read_pdf(file_path, page, page_size)
76
+ elif command == "info":
77
+ result = get_pdf_info(file_path)
78
+ else:
79
+ result = {"error": f"Unknown command: {command}"}
80
+
81
+ print(json.dumps(result, default=str))
@@ -0,0 +1,331 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Text file handler - supports .txt, .csv, .md, .json, .yaml, .yml
4
+ Provides structured parsing for CSV and JSON files
5
+ """
6
+
7
+ import sys
8
+ import json
9
+ import os
10
+ import csv
11
+ from io import StringIO
12
+
13
+
14
+ def detect_file_type(file_path):
15
+ """Detect file type from extension."""
16
+ ext = file_path.lower().split('.')[-1] if '.' in file_path else ''
17
+ return ext
18
+
19
+
20
+ def read_text(file_path, page=None, page_size=None):
21
+ """Read text file with optional pagination."""
22
+ try:
23
+ # Detect encoding
24
+ encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
25
+ content = None
26
+ used_encoding = None
27
+
28
+ for encoding in encodings:
29
+ try:
30
+ with open(file_path, 'r', encoding=encoding) as f:
31
+ content = f.read()
32
+ used_encoding = encoding
33
+ break
34
+ except UnicodeDecodeError:
35
+ continue
36
+
37
+ if content is None:
38
+ raise Exception("Could not decode file with any supported encoding")
39
+
40
+ lines = content.split('\n')
41
+ total_lines = len(lines)
42
+
43
+ # Pagination
44
+ if page is not None and page_size is not None:
45
+ start = (page - 1) * page_size
46
+ end = start + page_size
47
+ paginated_lines = lines[start:end]
48
+ has_more = end < total_lines
49
+
50
+ return {
51
+ "success": True,
52
+ "content": '\n'.join(paginated_lines),
53
+ "total_lines": total_lines,
54
+ "page": page,
55
+ "page_size": page_size,
56
+ "has_more": has_more,
57
+ "encoding": used_encoding
58
+ }
59
+ else:
60
+ return {
61
+ "success": True,
62
+ "content": content,
63
+ "total_lines": total_lines,
64
+ "encoding": used_encoding
65
+ }
66
+ except Exception as e:
67
+ return {"success": False, "error": str(e)}
68
+
69
+
70
+ def read_csv(file_path, page=None, page_size=None):
71
+ """Read CSV file and return structured data."""
72
+ try:
73
+ encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
74
+ content = None
75
+ used_encoding = None
76
+
77
+ for encoding in encodings:
78
+ try:
79
+ with open(file_path, 'r', encoding=encoding) as f:
80
+ content = f.read()
81
+ used_encoding = encoding
82
+ break
83
+ except UnicodeDecodeError:
84
+ continue
85
+
86
+ if content is None:
87
+ raise Exception("Could not decode file with any supported encoding")
88
+
89
+ # Parse CSV
90
+ reader = csv.reader(StringIO(content))
91
+ rows = list(reader)
92
+
93
+ if not rows:
94
+ return {
95
+ "success": True,
96
+ "headers": [],
97
+ "data": [],
98
+ "total_rows": 0,
99
+ "encoding": used_encoding
100
+ }
101
+
102
+ headers = rows[0]
103
+ data_rows = rows[1:]
104
+ total_rows = len(data_rows)
105
+
106
+ # Convert to list of dicts
107
+ structured_data = []
108
+ for row in data_rows:
109
+ row_dict = {}
110
+ for i, header in enumerate(headers):
111
+ row_dict[header] = row[i] if i < len(row) else ""
112
+ structured_data.append(row_dict)
113
+
114
+ # Pagination
115
+ if page is not None and page_size is not None:
116
+ start = (page - 1) * page_size
117
+ end = start + page_size
118
+ paginated_data = structured_data[start:end]
119
+ has_more = end < total_rows
120
+
121
+ return {
122
+ "success": True,
123
+ "headers": headers,
124
+ "data": paginated_data,
125
+ "total_rows": total_rows,
126
+ "page": page,
127
+ "page_size": page_size,
128
+ "has_more": has_more,
129
+ "encoding": used_encoding
130
+ }
131
+ else:
132
+ return {
133
+ "success": True,
134
+ "headers": headers,
135
+ "data": structured_data,
136
+ "total_rows": total_rows,
137
+ "encoding": used_encoding
138
+ }
139
+ except Exception as e:
140
+ return {"success": False, "error": str(e)}
141
+
142
+
143
+ def read_json(file_path):
144
+ """Read JSON file and return parsed object."""
145
+ try:
146
+ encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
147
+ content = None
148
+ used_encoding = None
149
+
150
+ for encoding in encodings:
151
+ try:
152
+ with open(file_path, 'r', encoding=encoding) as f:
153
+ content = f.read()
154
+ used_encoding = encoding
155
+ break
156
+ except UnicodeDecodeError:
157
+ continue
158
+
159
+ if content is None:
160
+ raise Exception("Could not decode file with any supported encoding")
161
+
162
+ parsed = json.loads(content)
163
+
164
+ return {
165
+ "success": True,
166
+ "data": parsed,
167
+ "encoding": used_encoding
168
+ }
169
+ except json.JSONDecodeError as e:
170
+ return {"success": False, "error": f"Invalid JSON: {str(e)}"}
171
+ except Exception as e:
172
+ return {"success": False, "error": str(e)}
173
+
174
+
175
+ def write_text(file_path, content):
176
+ """Write content to text file."""
177
+ try:
178
+ with open(file_path, 'w', encoding='utf-8') as f:
179
+ f.write(content)
180
+ return {"success": True, "message": "File written successfully"}
181
+ except Exception as e:
182
+ return {"success": False, "error": str(e)}
183
+
184
+
185
+ def write_csv(file_path, data):
186
+ """Write data to CSV file."""
187
+ try:
188
+ if isinstance(data, str):
189
+ data = json.loads(data)
190
+
191
+ with open(file_path, 'w', encoding='utf-8', newline='') as f:
192
+ if data and len(data) > 0:
193
+ writer = csv.DictWriter(f, fieldnames=data[0].keys())
194
+ writer.writeheader()
195
+ writer.writerows(data)
196
+ return {"success": True, "message": "CSV file written successfully"}
197
+ except Exception as e:
198
+ return {"success": False, "error": str(e)}
199
+
200
+
201
+ def write_json(file_path, data):
202
+ """Write data to JSON file."""
203
+ try:
204
+ if isinstance(data, str):
205
+ data = json.loads(data)
206
+
207
+ with open(file_path, 'w', encoding='utf-8') as f:
208
+ json.dump(data, f, ensure_ascii=False, indent=2)
209
+ return {"success": True, "message": "JSON file written successfully"}
210
+ except Exception as e:
211
+ return {"success": False, "error": str(e)}
212
+
213
+
214
+ def get_info(file_path):
215
+ """Get text file metadata."""
216
+ try:
217
+ stat = os.stat(file_path)
218
+ file_type = detect_file_type(file_path)
219
+
220
+ # Try to detect encoding and count lines
221
+ encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
222
+ line_count = 0
223
+ encoding = None
224
+
225
+ for enc in encodings:
226
+ try:
227
+ with open(file_path, 'r', encoding=enc) as f:
228
+ content = f.read()
229
+ line_count = len(content.split('\n'))
230
+ encoding = enc
231
+ break
232
+ except UnicodeDecodeError:
233
+ continue
234
+
235
+ result = {
236
+ "success": True,
237
+ "file_size": stat.st_size,
238
+ "line_count": line_count,
239
+ "encoding": encoding or "unknown",
240
+ "file_type": file_type
241
+ }
242
+
243
+ # Add type-specific info
244
+ if file_type == 'csv':
245
+ try:
246
+ with open(file_path, 'r', encoding=encoding or 'utf-8') as f:
247
+ reader = csv.reader(f)
248
+ rows = list(reader)
249
+ if rows:
250
+ result['headers'] = rows[0]
251
+ result['total_rows'] = len(rows) - 1
252
+ result['total_cols'] = len(rows[0])
253
+ except:
254
+ pass
255
+ elif file_type == 'json':
256
+ try:
257
+ with open(file_path, 'r', encoding=encoding or 'utf-8') as f:
258
+ data = json.load(f)
259
+ if isinstance(data, list):
260
+ result['item_count'] = len(data)
261
+ elif isinstance(data, dict):
262
+ result['key_count'] = len(data.keys())
263
+ except:
264
+ pass
265
+
266
+ return result
267
+ except Exception as e:
268
+ return {"success": False, "error": str(e)}
269
+
270
+
271
+ def main():
272
+ if len(sys.argv) < 2:
273
+ print(json.dumps({"success": False, "error": "No command specified"}))
274
+ return
275
+
276
+ command = sys.argv[1]
277
+
278
+ if command == "read":
279
+ if len(sys.argv) < 3:
280
+ print(json.dumps({"success": False, "error": "No file path specified"}))
281
+ return
282
+
283
+ file_path = sys.argv[2]
284
+ page = int(sys.argv[3]) if len(sys.argv) > 3 else None
285
+ page_size = int(sys.argv[4]) if len(sys.argv) > 4 else None
286
+
287
+ file_type = detect_file_type(file_path)
288
+
289
+ if file_type == 'csv':
290
+ result = read_csv(file_path, page, page_size)
291
+ elif file_type == 'json':
292
+ result = read_json(file_path)
293
+ else:
294
+ result = read_text(file_path, page, page_size)
295
+
296
+ print(json.dumps(result))
297
+
298
+ elif command == "write":
299
+ if len(sys.argv) < 4:
300
+ print(json.dumps({"success": False, "error": "Insufficient arguments"}))
301
+ return
302
+
303
+ file_path = sys.argv[2]
304
+ content = sys.argv[3]
305
+
306
+ file_type = detect_file_type(file_path)
307
+
308
+ if file_type == 'csv':
309
+ result = write_csv(file_path, content)
310
+ elif file_type == 'json':
311
+ result = write_json(file_path, content)
312
+ else:
313
+ result = write_text(file_path, content)
314
+
315
+ print(json.dumps(result))
316
+
317
+ elif command == "info":
318
+ if len(sys.argv) < 3:
319
+ print(json.dumps({"success": False, "error": "No file path specified"}))
320
+ return
321
+
322
+ file_path = sys.argv[2]
323
+ result = get_info(file_path)
324
+ print(json.dumps(result))
325
+
326
+ else:
327
+ print(json.dumps({"success": False, "error": f"Unknown command: {command}"}))
328
+
329
+
330
+ if __name__ == "__main__":
331
+ main()
@@ -0,0 +1,98 @@
1
+ """
2
+ Word document handler - read/write DOCX files
3
+ """
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ def read_word(file_path: str, page: int = None, page_size: int = 100):
9
+ """Read Word document with optional pagination by paragraphs"""
10
+ from docx import Document
11
+
12
+ doc = Document(file_path)
13
+
14
+ # Extract paragraphs
15
+ paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
16
+
17
+ # Extract tables
18
+ tables = []
19
+ for table in doc.tables:
20
+ table_data = []
21
+ for row in table.rows:
22
+ row_data = [cell.text for cell in row.cells]
23
+ table_data.append(row_data)
24
+ tables.append(table_data)
25
+
26
+ # Handle pagination for paragraphs
27
+ if page is not None:
28
+ start = (page - 1) * page_size
29
+ end = start + page_size
30
+ paragraphs = paragraphs[start:end]
31
+ total_pages = (len(doc.paragraphs) + page_size - 1) // page_size
32
+ else:
33
+ total_pages = 1
34
+
35
+ return {
36
+ "paragraphs": paragraphs,
37
+ "tables": tables,
38
+ "total_paragraphs": len(doc.paragraphs),
39
+ "total_tables": len(doc.tables),
40
+ "current_page": page,
41
+ "page_size": page_size if page else None,
42
+ "total_pages": total_pages
43
+ }
44
+
45
+ def get_word_info(file_path: str):
46
+ """Get Word document metadata"""
47
+ from docx import Document
48
+
49
+ doc = Document(file_path)
50
+
51
+ # Count non-empty paragraphs
52
+ para_count = sum(1 for p in doc.paragraphs if p.text.strip())
53
+
54
+ return {
55
+ "paragraphs": para_count,
56
+ "tables": len(doc.tables),
57
+ "file_size": Path(file_path).stat().st_size
58
+ }
59
+
60
+ def write_word(file_path: str, paragraphs: list, tables: list = None):
61
+ """Write data to Word document"""
62
+ from docx import Document
63
+
64
+ doc = Document()
65
+
66
+ # Add paragraphs
67
+ for text in paragraphs:
68
+ doc.add_paragraph(text)
69
+
70
+ # Add tables if provided
71
+ if tables:
72
+ for table_data in tables:
73
+ table = doc.add_table(rows=len(table_data), cols=len(table_data[0]) if table_data else 1)
74
+ for i, row_data in enumerate(table_data):
75
+ for j, cell_text in enumerate(row_data):
76
+ table.rows[i].cells[j].text = str(cell_text)
77
+
78
+ doc.save(file_path)
79
+ return {"success": True, "file_path": file_path}
80
+
81
+ if __name__ == "__main__":
82
+ command = sys.argv[1]
83
+ file_path = sys.argv[2]
84
+
85
+ if command == "read":
86
+ page = int(sys.argv[3]) if len(sys.argv) > 3 else None
87
+ page_size = int(sys.argv[4]) if len(sys.argv) > 4 else 100
88
+ result = read_word(file_path, page, page_size)
89
+ elif command == "info":
90
+ result = get_word_info(file_path)
91
+ elif command == "write":
92
+ paragraphs = json.loads(sys.argv[3])
93
+ tables = json.loads(sys.argv[4]) if len(sys.argv) > 4 else None
94
+ result = write_word(file_path, paragraphs, tables)
95
+ else:
96
+ result = {"error": f"Unknown command: {command}"}
97
+
98
+ print(json.dumps(result, default=str))