docsmith-mcp 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/test.yml +35 -0
- package/LICENSE +21 -0
- package/README.md +109 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +679 -0
- package/dist/index.js.map +1 -0
- package/dist/python/excel_handler.py +97 -0
- package/dist/python/pdf_handler.py +81 -0
- package/dist/python/text_handler.py +331 -0
- package/dist/python/word_handler.py +98 -0
- package/examples/sample_data.csv +6 -0
- package/examples/sample_data.json +9 -0
- package/examples/sample_document.pdf +80 -0
- package/examples/sample_report.docx +0 -0
- package/examples/sample_sales_data.xlsx +0 -0
- package/examples/sample_text.txt +10 -0
- package/package.json +36 -0
- package/python/excel_handler.py +97 -0
- package/python/pdf_handler.py +81 -0
- package/python/text_handler.py +331 -0
- package/python/word_handler.py +98 -0
- package/scripts/preload-packages.mjs +64 -0
- package/src/code-runner.ts +136 -0
- package/src/index.ts +496 -0
- package/src/utils.ts +45 -0
- package/tests/document-processing.test.ts +230 -0
- package/tsconfig.json +20 -0
- package/tsdown.config.ts +21 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
%PDF-1.4
|
|
2
|
+
%���� ReportLab Generated PDF document (opensource)
|
|
3
|
+
1 0 obj
|
|
4
|
+
<<
|
|
5
|
+
/F1 2 0 R /F2 3 0 R /F3 4 0 R
|
|
6
|
+
>>
|
|
7
|
+
endobj
|
|
8
|
+
2 0 obj
|
|
9
|
+
<<
|
|
10
|
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
|
11
|
+
>>
|
|
12
|
+
endobj
|
|
13
|
+
3 0 obj
|
|
14
|
+
<<
|
|
15
|
+
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
|
16
|
+
>>
|
|
17
|
+
endobj
|
|
18
|
+
4 0 obj
|
|
19
|
+
<<
|
|
20
|
+
/BaseFont /Helvetica-BoldOblique /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
|
|
21
|
+
>>
|
|
22
|
+
endobj
|
|
23
|
+
5 0 obj
|
|
24
|
+
<<
|
|
25
|
+
/Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 8 0 R /Resources <<
|
|
26
|
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
|
27
|
+
>> /Rotate 0 /Trans <<
|
|
28
|
+
|
|
29
|
+
>>
|
|
30
|
+
/Type /Page
|
|
31
|
+
>>
|
|
32
|
+
endobj
|
|
33
|
+
6 0 obj
|
|
34
|
+
<<
|
|
35
|
+
/PageMode /UseNone /Pages 8 0 R /Type /Catalog
|
|
36
|
+
>>
|
|
37
|
+
endobj
|
|
38
|
+
7 0 obj
|
|
39
|
+
<<
|
|
40
|
+
/Author (\(anonymous\)) /CreationDate (D:20260202143317+08'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260202143317+08'00') /Producer (ReportLab PDF Library - \(opensource\))
|
|
41
|
+
/Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
|
|
42
|
+
>>
|
|
43
|
+
endobj
|
|
44
|
+
8 0 obj
|
|
45
|
+
<<
|
|
46
|
+
/Count 1 /Kids [ 5 0 R ] /Type /Pages
|
|
47
|
+
>>
|
|
48
|
+
endobj
|
|
49
|
+
9 0 obj
|
|
50
|
+
<<
|
|
51
|
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1245
|
|
52
|
+
>>
|
|
53
|
+
stream
|
|
54
|
+
Gat=*a`?,q&A@B[qCj@6@N3-MJE/FV3LNI/b/@=\/p"j*O9f&9'J<FVYFc"Xb;(MJ/=&]&q1*qXDPA3TMAL&WP76C*!oA<4^c3lG^]G.aiY!jZZQD!S+i=RbbRodl_Dm`/#/\8+&.F@/rWC,cL't7pm;lMb_hC,?^b<pB?^<E<"HDpH@L;Y8H!]eG>a_]E.c?Q75@JO6Mq;qI#`*$*iUrR7&/+(R*SOY/s"U:.ooN;&o3r.^(3s6XpFb4cr#Yhl`QAp`c4^:emn@_6c)rEO8qZtj4f';Hg=iH<Y`8qmSi/^"prjPR/O?jc.GD:rFdD:/r\>e0\q$SfH;_V'O)(!rY$S$/DW=:G]7Er=):1ric(/#2S!]OcS_KE(FC%7F2l<Z]DS4:A/tH'%YKa@mZnC.:8tda,NCbU$gll6!OD6<c18)[>d+*phMQ_lQB6+Sj9NB`@4R/%Z&fpgZN+)Pb*k%,ien^:%iYY&t+o%R_(iWTjSrm,P`56JS'Ft<!cNne\"%O"\\pM'FVUkN0Nc?_!6')+52GmB^o6/`4S)+7Q:ln5J*f=)1`dqAl_B/g?3)BlQ_Dph3mo,d\boUskB8;1eYR*cQO>:_)\\`C),+@4S5ZB:]llV18RFm3m,j'G=:?2OWLmUTh!'TH7eMn,$%*Wb-)V@Keh7.fAasj'PA(?4"Uf5*qKF]b3-X]*V%X0D@1rVuc(MuWVHoWu]9N(Mfm(r7O:3l#[&NTp01\i$8%XBPg'MV#``/$@VCL,V6V3B'5I#;W/=PVr3WVg]]L`BeWT7DcK'/b#!%JO<1,l!&MS$%(@52r2?s/K_fX,Fk)>W5\E)!N7aI/rr$7qUcncs(MB>@fKjES+KL)QYpbXk8,L.h5c6$\HBh^CSW"n/]K;gX-*dVL!R5W-e%$l%p9S7JOqQ/[/CJh`n%=ZKlUbhs0D[/Q=^l'(bk1_h7+H0@g_?l8h^=lqmh)Z\*,RQ??*A`ocDCO$LA)j2Ff1\4cHr3NP70'`7F&H"Pg`CL6/kD:c&On*@0m6V;>A;cHK;5'Y&<H=cUS]mb/cS@h3HJ>;+I]?uHq!PQt$nu+d;5@]*`S*b212I97IcJ@3%%3JjFS7d?@NX7?[YSRU!e4U^l\bVZG$=g:>o2;S\%CiNA@<HH<lX`t/oFc!.<L,t:APjUrYuFbX6fKUtkK>2FNM+._nO`>K@rulPBWte.ChGYqo;sOFT7A%`F*eh,XHUSeXYO\"fV&:tg&I\b^(p~>endstream
|
|
55
|
+
endobj
|
|
56
|
+
xref
|
|
57
|
+
0 10
|
|
58
|
+
0000000000 65535 f
|
|
59
|
+
0000000061 00000 n
|
|
60
|
+
0000000112 00000 n
|
|
61
|
+
0000000219 00000 n
|
|
62
|
+
0000000331 00000 n
|
|
63
|
+
0000000450 00000 n
|
|
64
|
+
0000000643 00000 n
|
|
65
|
+
0000000711 00000 n
|
|
66
|
+
0000000991 00000 n
|
|
67
|
+
0000001050 00000 n
|
|
68
|
+
trailer
|
|
69
|
+
<<
|
|
70
|
+
/ID
|
|
71
|
+
[<72e692cf8a12e2d98cf221bdbcb57649><72e692cf8a12e2d98cf221bdbcb57649>]
|
|
72
|
+
% ReportLab generated PDF document -- digest (opensource)
|
|
73
|
+
|
|
74
|
+
/Info 7 0 R
|
|
75
|
+
/Root 6 0 R
|
|
76
|
+
/Size 10
|
|
77
|
+
>>
|
|
78
|
+
startxref
|
|
79
|
+
2386
|
|
80
|
+
%%EOF
|
|
Binary file
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "docsmith-mcp",
|
|
3
|
+
"version": "0.0.1-beta.1",
|
|
4
|
+
"description": "Python-powered document processing MCP for Excel, Word, PDF",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsdown",
|
|
9
|
+
"dev": "tsdown --watch",
|
|
10
|
+
"prepare": "pnpm build",
|
|
11
|
+
"postinstall": "node --experimental-wasm-stack-switching scripts/preload-packages.mjs || true",
|
|
12
|
+
"test": "vitest",
|
|
13
|
+
"test:run": "vitest run",
|
|
14
|
+
"test:ui": "vitest --ui"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"mcp",
|
|
18
|
+
"document",
|
|
19
|
+
"excel",
|
|
20
|
+
"word",
|
|
21
|
+
"pdf"
|
|
22
|
+
],
|
|
23
|
+
"author": "",
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@mcpc-tech/code-runner-mcp": "0.1.8",
|
|
27
|
+
"@modelcontextprotocol/sdk": "^1.25.3",
|
|
28
|
+
"zod": "3"
|
|
29
|
+
},
|
|
30
|
+
"devDependencies": {
|
|
31
|
+
"@vitest/ui": "^4.0.18",
|
|
32
|
+
"tsdown": "^0.9.0",
|
|
33
|
+
"typescript": "^5.7.0",
|
|
34
|
+
"vitest": "^4.0.18"
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Excel document handler - read/write Excel files
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
def read_excel(file_path: str, sheet_name: str = None, page: int = None, page_size: int = 100):
|
|
9
|
+
"""Read Excel file with optional pagination"""
|
|
10
|
+
import openpyxl
|
|
11
|
+
|
|
12
|
+
wb = openpyxl.load_workbook(file_path, data_only=True)
|
|
13
|
+
|
|
14
|
+
if sheet_name is None:
|
|
15
|
+
sheet_name = wb.sheetnames[0]
|
|
16
|
+
|
|
17
|
+
ws = wb[sheet_name]
|
|
18
|
+
|
|
19
|
+
# Get all data
|
|
20
|
+
data = []
|
|
21
|
+
for row in ws.iter_rows(values_only=True):
|
|
22
|
+
data.append(row)
|
|
23
|
+
|
|
24
|
+
# Handle pagination
|
|
25
|
+
if page is not None:
|
|
26
|
+
start = (page - 1) * page_size
|
|
27
|
+
end = start + page_size
|
|
28
|
+
data = data[start:end]
|
|
29
|
+
total_pages = (len(data) + page_size - 1) // page_size if data else 1
|
|
30
|
+
else:
|
|
31
|
+
total_pages = 1
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
"sheet_name": sheet_name,
|
|
35
|
+
"sheets": wb.sheetnames,
|
|
36
|
+
"total_rows": ws.max_row,
|
|
37
|
+
"total_cols": ws.max_column,
|
|
38
|
+
"current_page": page,
|
|
39
|
+
"page_size": page_size if page else None,
|
|
40
|
+
"total_pages": total_pages,
|
|
41
|
+
"data": data
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def get_excel_info(file_path: str):
|
|
45
|
+
"""Get Excel file metadata"""
|
|
46
|
+
import openpyxl
|
|
47
|
+
|
|
48
|
+
wb = openpyxl.load_workbook(file_path, data_only=True)
|
|
49
|
+
info = {
|
|
50
|
+
"sheets": [],
|
|
51
|
+
"file_size": Path(file_path).stat().st_size
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
for sheet_name in wb.sheetnames:
|
|
55
|
+
ws = wb[sheet_name]
|
|
56
|
+
info["sheets"].append({
|
|
57
|
+
"name": sheet_name,
|
|
58
|
+
"rows": ws.max_row,
|
|
59
|
+
"cols": ws.max_column
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
return info
|
|
63
|
+
|
|
64
|
+
def write_excel(file_path: str, data: list, sheet_name: str = "Sheet1"):
|
|
65
|
+
"""Write data to Excel file"""
|
|
66
|
+
import openpyxl
|
|
67
|
+
|
|
68
|
+
wb = openpyxl.Workbook()
|
|
69
|
+
ws = wb.active
|
|
70
|
+
ws.title = sheet_name
|
|
71
|
+
|
|
72
|
+
for row in data:
|
|
73
|
+
ws.append(row)
|
|
74
|
+
|
|
75
|
+
wb.save(file_path)
|
|
76
|
+
return {"success": True, "file_path": file_path}
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
command = sys.argv[1]
|
|
80
|
+
file_path = sys.argv[2]
|
|
81
|
+
|
|
82
|
+
if command == "read":
|
|
83
|
+
sheet = sys.argv[3] if len(sys.argv) > 3 else None
|
|
84
|
+
page = int(sys.argv[4]) if len(sys.argv) > 4 else None
|
|
85
|
+
page_size = int(sys.argv[5]) if len(sys.argv) > 5 else 100
|
|
86
|
+
result = read_excel(file_path, sheet, page, page_size)
|
|
87
|
+
elif command == "info":
|
|
88
|
+
result = get_excel_info(file_path)
|
|
89
|
+
elif command == "write":
|
|
90
|
+
# Data passed as JSON string
|
|
91
|
+
data = json.loads(sys.argv[3])
|
|
92
|
+
sheet = sys.argv[4] if len(sys.argv) > 4 else "Sheet1"
|
|
93
|
+
result = write_excel(file_path, data, sheet)
|
|
94
|
+
else:
|
|
95
|
+
result = {"error": f"Unknown command: {command}"}
|
|
96
|
+
|
|
97
|
+
print(json.dumps(result, default=str))
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PDF document handler - read PDF files using PyPDF2
|
|
3
|
+
"""
|
|
4
|
+
import json
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
def read_pdf(file_path: str, page: int = None, page_size: int = 100):
|
|
9
|
+
"""Read PDF with optional pagination by pages"""
|
|
10
|
+
from PyPDF2 import PdfReader
|
|
11
|
+
|
|
12
|
+
reader = PdfReader(file_path)
|
|
13
|
+
total_pages = len(reader.pages)
|
|
14
|
+
|
|
15
|
+
# Handle pagination
|
|
16
|
+
if page is not None:
|
|
17
|
+
start_page = (page - 1) * page_size
|
|
18
|
+
end_page = min(start_page + page_size, total_pages)
|
|
19
|
+
pages_to_read = range(start_page, end_page)
|
|
20
|
+
current_page = page
|
|
21
|
+
total_page_groups = (total_pages + page_size - 1) // page_size
|
|
22
|
+
else:
|
|
23
|
+
pages_to_read = range(total_pages)
|
|
24
|
+
current_page = None
|
|
25
|
+
total_page_groups = 1
|
|
26
|
+
|
|
27
|
+
content = []
|
|
28
|
+
for i in pages_to_read:
|
|
29
|
+
page_obj = reader.pages[i]
|
|
30
|
+
text = page_obj.extract_text()
|
|
31
|
+
content.append({
|
|
32
|
+
"page_number": i + 1,
|
|
33
|
+
"text": text or "",
|
|
34
|
+
"words": len(text.split()) if text else 0
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"total_pages": total_pages,
|
|
39
|
+
"current_page_group": current_page,
|
|
40
|
+
"page_size": page_size if page else None,
|
|
41
|
+
"total_page_groups": total_page_groups,
|
|
42
|
+
"content": content
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
def get_pdf_info(file_path: str):
|
|
46
|
+
"""Get PDF metadata"""
|
|
47
|
+
from PyPDF2 import PdfReader
|
|
48
|
+
|
|
49
|
+
reader = PdfReader(file_path)
|
|
50
|
+
info = {
|
|
51
|
+
"pages": len(reader.pages),
|
|
52
|
+
"file_size": Path(file_path).stat().st_size
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
# Try to get PDF metadata
|
|
56
|
+
if reader.metadata:
|
|
57
|
+
info["metadata"] = {k: str(v) for k, v in reader.metadata.items()}
|
|
58
|
+
|
|
59
|
+
# Count total words
|
|
60
|
+
total_words = 0
|
|
61
|
+
for page in reader.pages:
|
|
62
|
+
text = page.extract_text() or ""
|
|
63
|
+
total_words += len(text.split())
|
|
64
|
+
info["total_words"] = total_words
|
|
65
|
+
|
|
66
|
+
return info
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
command = sys.argv[1]
|
|
70
|
+
file_path = sys.argv[2]
|
|
71
|
+
|
|
72
|
+
if command == "read":
|
|
73
|
+
page = int(sys.argv[3]) if len(sys.argv) > 3 else None
|
|
74
|
+
page_size = int(sys.argv[4]) if len(sys.argv) > 4 else 10
|
|
75
|
+
result = read_pdf(file_path, page, page_size)
|
|
76
|
+
elif command == "info":
|
|
77
|
+
result = get_pdf_info(file_path)
|
|
78
|
+
else:
|
|
79
|
+
result = {"error": f"Unknown command: {command}"}
|
|
80
|
+
|
|
81
|
+
print(json.dumps(result, default=str))
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Text file handler - supports .txt, .csv, .md, .json, .yaml, .yml
|
|
4
|
+
Provides structured parsing for CSV and JSON files
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import csv
|
|
11
|
+
from io import StringIO
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def detect_file_type(file_path):
|
|
15
|
+
"""Detect file type from extension."""
|
|
16
|
+
ext = file_path.lower().split('.')[-1] if '.' in file_path else ''
|
|
17
|
+
return ext
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def read_text(file_path, page=None, page_size=None):
|
|
21
|
+
"""Read text file with optional pagination."""
|
|
22
|
+
try:
|
|
23
|
+
# Detect encoding
|
|
24
|
+
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
|
|
25
|
+
content = None
|
|
26
|
+
used_encoding = None
|
|
27
|
+
|
|
28
|
+
for encoding in encodings:
|
|
29
|
+
try:
|
|
30
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
31
|
+
content = f.read()
|
|
32
|
+
used_encoding = encoding
|
|
33
|
+
break
|
|
34
|
+
except UnicodeDecodeError:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
if content is None:
|
|
38
|
+
raise Exception("Could not decode file with any supported encoding")
|
|
39
|
+
|
|
40
|
+
lines = content.split('\n')
|
|
41
|
+
total_lines = len(lines)
|
|
42
|
+
|
|
43
|
+
# Pagination
|
|
44
|
+
if page is not None and page_size is not None:
|
|
45
|
+
start = (page - 1) * page_size
|
|
46
|
+
end = start + page_size
|
|
47
|
+
paginated_lines = lines[start:end]
|
|
48
|
+
has_more = end < total_lines
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"success": True,
|
|
52
|
+
"content": '\n'.join(paginated_lines),
|
|
53
|
+
"total_lines": total_lines,
|
|
54
|
+
"page": page,
|
|
55
|
+
"page_size": page_size,
|
|
56
|
+
"has_more": has_more,
|
|
57
|
+
"encoding": used_encoding
|
|
58
|
+
}
|
|
59
|
+
else:
|
|
60
|
+
return {
|
|
61
|
+
"success": True,
|
|
62
|
+
"content": content,
|
|
63
|
+
"total_lines": total_lines,
|
|
64
|
+
"encoding": used_encoding
|
|
65
|
+
}
|
|
66
|
+
except Exception as e:
|
|
67
|
+
return {"success": False, "error": str(e)}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def read_csv(file_path, page=None, page_size=None):
|
|
71
|
+
"""Read CSV file and return structured data."""
|
|
72
|
+
try:
|
|
73
|
+
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
|
|
74
|
+
content = None
|
|
75
|
+
used_encoding = None
|
|
76
|
+
|
|
77
|
+
for encoding in encodings:
|
|
78
|
+
try:
|
|
79
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
80
|
+
content = f.read()
|
|
81
|
+
used_encoding = encoding
|
|
82
|
+
break
|
|
83
|
+
except UnicodeDecodeError:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
if content is None:
|
|
87
|
+
raise Exception("Could not decode file with any supported encoding")
|
|
88
|
+
|
|
89
|
+
# Parse CSV
|
|
90
|
+
reader = csv.reader(StringIO(content))
|
|
91
|
+
rows = list(reader)
|
|
92
|
+
|
|
93
|
+
if not rows:
|
|
94
|
+
return {
|
|
95
|
+
"success": True,
|
|
96
|
+
"headers": [],
|
|
97
|
+
"data": [],
|
|
98
|
+
"total_rows": 0,
|
|
99
|
+
"encoding": used_encoding
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
headers = rows[0]
|
|
103
|
+
data_rows = rows[1:]
|
|
104
|
+
total_rows = len(data_rows)
|
|
105
|
+
|
|
106
|
+
# Convert to list of dicts
|
|
107
|
+
structured_data = []
|
|
108
|
+
for row in data_rows:
|
|
109
|
+
row_dict = {}
|
|
110
|
+
for i, header in enumerate(headers):
|
|
111
|
+
row_dict[header] = row[i] if i < len(row) else ""
|
|
112
|
+
structured_data.append(row_dict)
|
|
113
|
+
|
|
114
|
+
# Pagination
|
|
115
|
+
if page is not None and page_size is not None:
|
|
116
|
+
start = (page - 1) * page_size
|
|
117
|
+
end = start + page_size
|
|
118
|
+
paginated_data = structured_data[start:end]
|
|
119
|
+
has_more = end < total_rows
|
|
120
|
+
|
|
121
|
+
return {
|
|
122
|
+
"success": True,
|
|
123
|
+
"headers": headers,
|
|
124
|
+
"data": paginated_data,
|
|
125
|
+
"total_rows": total_rows,
|
|
126
|
+
"page": page,
|
|
127
|
+
"page_size": page_size,
|
|
128
|
+
"has_more": has_more,
|
|
129
|
+
"encoding": used_encoding
|
|
130
|
+
}
|
|
131
|
+
else:
|
|
132
|
+
return {
|
|
133
|
+
"success": True,
|
|
134
|
+
"headers": headers,
|
|
135
|
+
"data": structured_data,
|
|
136
|
+
"total_rows": total_rows,
|
|
137
|
+
"encoding": used_encoding
|
|
138
|
+
}
|
|
139
|
+
except Exception as e:
|
|
140
|
+
return {"success": False, "error": str(e)}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def read_json(file_path):
|
|
144
|
+
"""Read JSON file and return parsed object."""
|
|
145
|
+
try:
|
|
146
|
+
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
|
|
147
|
+
content = None
|
|
148
|
+
used_encoding = None
|
|
149
|
+
|
|
150
|
+
for encoding in encodings:
|
|
151
|
+
try:
|
|
152
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
153
|
+
content = f.read()
|
|
154
|
+
used_encoding = encoding
|
|
155
|
+
break
|
|
156
|
+
except UnicodeDecodeError:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
if content is None:
|
|
160
|
+
raise Exception("Could not decode file with any supported encoding")
|
|
161
|
+
|
|
162
|
+
parsed = json.loads(content)
|
|
163
|
+
|
|
164
|
+
return {
|
|
165
|
+
"success": True,
|
|
166
|
+
"data": parsed,
|
|
167
|
+
"encoding": used_encoding
|
|
168
|
+
}
|
|
169
|
+
except json.JSONDecodeError as e:
|
|
170
|
+
return {"success": False, "error": f"Invalid JSON: {str(e)}"}
|
|
171
|
+
except Exception as e:
|
|
172
|
+
return {"success": False, "error": str(e)}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def write_text(file_path, content):
|
|
176
|
+
"""Write content to text file."""
|
|
177
|
+
try:
|
|
178
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
179
|
+
f.write(content)
|
|
180
|
+
return {"success": True, "message": "File written successfully"}
|
|
181
|
+
except Exception as e:
|
|
182
|
+
return {"success": False, "error": str(e)}
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def write_csv(file_path, data):
|
|
186
|
+
"""Write data to CSV file."""
|
|
187
|
+
try:
|
|
188
|
+
if isinstance(data, str):
|
|
189
|
+
data = json.loads(data)
|
|
190
|
+
|
|
191
|
+
with open(file_path, 'w', encoding='utf-8', newline='') as f:
|
|
192
|
+
if data and len(data) > 0:
|
|
193
|
+
writer = csv.DictWriter(f, fieldnames=data[0].keys())
|
|
194
|
+
writer.writeheader()
|
|
195
|
+
writer.writerows(data)
|
|
196
|
+
return {"success": True, "message": "CSV file written successfully"}
|
|
197
|
+
except Exception as e:
|
|
198
|
+
return {"success": False, "error": str(e)}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def write_json(file_path, data):
|
|
202
|
+
"""Write data to JSON file."""
|
|
203
|
+
try:
|
|
204
|
+
if isinstance(data, str):
|
|
205
|
+
data = json.loads(data)
|
|
206
|
+
|
|
207
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
208
|
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
209
|
+
return {"success": True, "message": "JSON file written successfully"}
|
|
210
|
+
except Exception as e:
|
|
211
|
+
return {"success": False, "error": str(e)}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def get_info(file_path):
|
|
215
|
+
"""Get text file metadata."""
|
|
216
|
+
try:
|
|
217
|
+
stat = os.stat(file_path)
|
|
218
|
+
file_type = detect_file_type(file_path)
|
|
219
|
+
|
|
220
|
+
# Try to detect encoding and count lines
|
|
221
|
+
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312', 'latin-1']
|
|
222
|
+
line_count = 0
|
|
223
|
+
encoding = None
|
|
224
|
+
|
|
225
|
+
for enc in encodings:
|
|
226
|
+
try:
|
|
227
|
+
with open(file_path, 'r', encoding=enc) as f:
|
|
228
|
+
content = f.read()
|
|
229
|
+
line_count = len(content.split('\n'))
|
|
230
|
+
encoding = enc
|
|
231
|
+
break
|
|
232
|
+
except UnicodeDecodeError:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
result = {
|
|
236
|
+
"success": True,
|
|
237
|
+
"file_size": stat.st_size,
|
|
238
|
+
"line_count": line_count,
|
|
239
|
+
"encoding": encoding or "unknown",
|
|
240
|
+
"file_type": file_type
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
# Add type-specific info
|
|
244
|
+
if file_type == 'csv':
|
|
245
|
+
try:
|
|
246
|
+
with open(file_path, 'r', encoding=encoding or 'utf-8') as f:
|
|
247
|
+
reader = csv.reader(f)
|
|
248
|
+
rows = list(reader)
|
|
249
|
+
if rows:
|
|
250
|
+
result['headers'] = rows[0]
|
|
251
|
+
result['total_rows'] = len(rows) - 1
|
|
252
|
+
result['total_cols'] = len(rows[0])
|
|
253
|
+
except:
|
|
254
|
+
pass
|
|
255
|
+
elif file_type == 'json':
|
|
256
|
+
try:
|
|
257
|
+
with open(file_path, 'r', encoding=encoding or 'utf-8') as f:
|
|
258
|
+
data = json.load(f)
|
|
259
|
+
if isinstance(data, list):
|
|
260
|
+
result['item_count'] = len(data)
|
|
261
|
+
elif isinstance(data, dict):
|
|
262
|
+
result['key_count'] = len(data.keys())
|
|
263
|
+
except:
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
return result
|
|
267
|
+
except Exception as e:
|
|
268
|
+
return {"success": False, "error": str(e)}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def main():
|
|
272
|
+
if len(sys.argv) < 2:
|
|
273
|
+
print(json.dumps({"success": False, "error": "No command specified"}))
|
|
274
|
+
return
|
|
275
|
+
|
|
276
|
+
command = sys.argv[1]
|
|
277
|
+
|
|
278
|
+
if command == "read":
|
|
279
|
+
if len(sys.argv) < 3:
|
|
280
|
+
print(json.dumps({"success": False, "error": "No file path specified"}))
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
file_path = sys.argv[2]
|
|
284
|
+
page = int(sys.argv[3]) if len(sys.argv) > 3 else None
|
|
285
|
+
page_size = int(sys.argv[4]) if len(sys.argv) > 4 else None
|
|
286
|
+
|
|
287
|
+
file_type = detect_file_type(file_path)
|
|
288
|
+
|
|
289
|
+
if file_type == 'csv':
|
|
290
|
+
result = read_csv(file_path, page, page_size)
|
|
291
|
+
elif file_type == 'json':
|
|
292
|
+
result = read_json(file_path)
|
|
293
|
+
else:
|
|
294
|
+
result = read_text(file_path, page, page_size)
|
|
295
|
+
|
|
296
|
+
print(json.dumps(result))
|
|
297
|
+
|
|
298
|
+
elif command == "write":
|
|
299
|
+
if len(sys.argv) < 4:
|
|
300
|
+
print(json.dumps({"success": False, "error": "Insufficient arguments"}))
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
file_path = sys.argv[2]
|
|
304
|
+
content = sys.argv[3]
|
|
305
|
+
|
|
306
|
+
file_type = detect_file_type(file_path)
|
|
307
|
+
|
|
308
|
+
if file_type == 'csv':
|
|
309
|
+
result = write_csv(file_path, content)
|
|
310
|
+
elif file_type == 'json':
|
|
311
|
+
result = write_json(file_path, content)
|
|
312
|
+
else:
|
|
313
|
+
result = write_text(file_path, content)
|
|
314
|
+
|
|
315
|
+
print(json.dumps(result))
|
|
316
|
+
|
|
317
|
+
elif command == "info":
|
|
318
|
+
if len(sys.argv) < 3:
|
|
319
|
+
print(json.dumps({"success": False, "error": "No file path specified"}))
|
|
320
|
+
return
|
|
321
|
+
|
|
322
|
+
file_path = sys.argv[2]
|
|
323
|
+
result = get_info(file_path)
|
|
324
|
+
print(json.dumps(result))
|
|
325
|
+
|
|
326
|
+
else:
|
|
327
|
+
print(json.dumps({"success": False, "error": f"Unknown command: {command}"}))
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
if __name__ == "__main__":
|
|
331
|
+
main()
|