@hasna/skills 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/index.js +341 -287
- package/bin/mcp.js +297 -245
- package/dist/index.d.ts +1 -1
- package/dist/index.js +79 -24
- package/dist/lib/basic-skills.test.d.ts +1 -0
- package/dist/lib/registry.d.ts +5 -0
- package/dist/lib/search.d.ts +2 -2
- package/package.json +1 -1
- package/skills/skill-academic-journal-matcher/package.json +1 -7
- package/skills/skill-action-item-router/package.json +1 -7
- package/skills/skill-ad-creative-generator/package.json +1 -7
- package/skills/skill-advanced-math/package.json +1 -7
- package/skills/skill-anomaly-investigator/package.json +1 -7
- package/skills/skill-api-test-suite/package.json +1 -7
- package/skills/skill-apidocs/package.json +0 -6
- package/skills/skill-audio/SKILL.md +3 -1
- package/skills/skill-audio/package.json +0 -5
- package/skills/skill-audio/src/index-local.ts +4 -1
- package/skills/skill-audio/src/providers/minimax.ts +83 -0
- package/skills/skill-audio/src/types.ts +1 -1
- package/skills/skill-audio-cleanup-lab/package.json +1 -5
- package/skills/{skill-extract-audio → skill-audio-extract}/SKILL.md +3 -3
- package/skills/{skill-extract-audio → skill-audio-extract}/package.json +3 -7
- package/skills/skill-audiobook-chapter-proofer/package.json +1 -7
- package/skills/skill-banner-ad-suite/package.json +1 -7
- package/skills/skill-benchmark-finder/package.json +1 -7
- package/skills/skill-bio-sequence-tool/package.json +1 -7
- package/skills/skill-blog-topic-cluster/package.json +1 -7
- package/skills/skill-brand-voice-audit/package.json +1 -7
- package/skills/skill-browse/package.json +0 -5
- package/skills/skill-businessactivity/package.json +1 -3
- package/skills/skill-calendar-events/package.json +1 -7
- package/skills/skill-campaign-metric-brief/package.json +1 -7
- package/skills/skill-campaign-moodboard/package.json +1 -7
- package/skills/skill-caption-style-stylist/package.json +1 -7
- package/skills/skill-chemistry-calculator/package.json +1 -7
- package/skills/skill-churn-risk-notifier/package.json +1 -7
- package/skills/skill-citation-formatter/package.json +1 -7
- package/skills/skill-classroom-newsletter-kit/package.json +1 -7
- package/skills/skill-codefix/package.json +0 -4
- package/skills/skill-color-palette-harmonizer/package.json +1 -7
- package/skills/skill-colorextract/package.json +17 -5
- package/skills/skill-commitpush/package.json +0 -1
- package/skills/skill-commitpushpr/package.json +0 -1
- package/skills/skill-competitor-ad-analyzer/package.json +1 -7
- package/skills/skill-compliance-copy-check/package.json +1 -7
- package/skills/skill-compliance-report-pack/package.json +1 -7
- package/skills/skill-compress-video/package.json +1 -5
- package/skills/skill-consolelog/package.json +1 -7
- package/skills/skill-contract-plainlanguage/package.json +1 -7
- package/skills/skill-convert/SKILL.md +7 -0
- package/skills/skill-convert/package.json +0 -4
- package/skills/skill-copytone-translator/package.json +1 -7
- package/skills/skill-create-blog-article/package.json +1 -7
- package/skills/skill-create-ebook/package.json +1 -7
- package/skills/skill-crm-note-enhancer/package.json +1 -7
- package/skills/skill-customer-journey-mapper/package.json +1 -7
- package/skills/skill-dashboard-builder/package.json +1 -7
- package/skills/skill-dashboard-narrator/package.json +1 -7
- package/skills/skill-data-anonymizer/package.json +1 -7
- package/skills/skill-database-explorer/package.json +1 -7
- package/skills/skill-dataset-health-check/package.json +1 -7
- package/skills/skill-decision-journal/package.json +1 -7
- package/skills/skill-deepresearch/package.json +0 -5
- package/skills/skill-delegation-brief-writer/package.json +1 -7
- package/skills/skill-deploy/package.json +0 -4
- package/skills/skill-destination-briefing/package.json +1 -7
- package/skills/skill-diff-viewer/package.json +1 -7
- package/skills/{skill-generate-docx → skill-doc-generate}/SKILL.md +2 -2
- package/skills/{skill-generate-pdf → skill-doc-generate}/package.json +9 -4
- package/skills/{skill-generate-docx → skill-doc-generate}/src/index.ts +91 -28
- package/skills/skill-doc-read/SKILL.md +45 -0
- package/skills/skill-doc-read/package.json +29 -0
- package/skills/skill-doc-read/src/index.ts +324 -0
- package/skills/skill-doc-read/tsconfig.json +8 -0
- package/skills/skill-domainpurchase/package.json +1 -7
- package/skills/skill-domainsearch/package.json +1 -7
- package/skills/skill-e2bswarm/package.json +0 -6
- package/skills/skill-educational-resource-finder/package.json +1 -7
- package/skills/skill-email-campaign/package.json +1 -7
- package/skills/skill-emoji/package.json +0 -1
- package/skills/skill-exam-readiness-check/package.json +1 -7
- package/skills/skill-excel/SKILL.md +38 -0
- package/skills/skill-excel/package.json +30 -0
- package/skills/{skill-generate-excel → skill-excel}/src/index.ts +41 -14
- package/skills/skill-experiment-power-calculator/package.json +1 -7
- package/skills/skill-extract/package.json +0 -5
- package/skills/skill-extract-frames/package.json +1 -7
- package/skills/skill-extract-invoice/package.json +1 -7
- package/skills/skill-family-activity-curator/package.json +1 -7
- package/skills/skill-faq-packager/package.json +1 -7
- package/skills/skill-feedback-survey-designer/package.json +1 -7
- package/skills/skill-field-trip-planner/package.json +1 -7
- package/skills/skill-file-organizer/package.json +1 -7
- package/skills/skill-folder-tree/package.json +1 -7
- package/skills/skill-forecast-scenario-lab/package.json +1 -7
- package/skills/skill-form-filler/package.json +1 -7
- package/skills/skill-generate-api-client/package.json +1 -7
- package/skills/skill-generate-book-cover/package.json +1 -7
- package/skills/skill-generate-chart/package.json +1 -7
- package/skills/skill-generate-diagram/package.json +1 -7
- package/skills/skill-generate-dockerfile/package.json +1 -7
- package/skills/skill-generate-documentation/package.json +1 -7
- package/skills/skill-generate-env/package.json +1 -7
- package/skills/skill-generate-favicon/package.json +1 -7
- package/skills/skill-generate-mock-data/package.json +1 -7
- package/skills/skill-generate-pr-description/package.json +1 -7
- package/skills/skill-generate-presentation/package.json +1 -7
- package/skills/skill-generate-qrcode/package.json +1 -7
- package/skills/skill-generate-regex/package.json +1 -7
- package/skills/skill-generate-resume/package.json +1 -7
- package/skills/skill-generate-sitemap/package.json +1 -7
- package/skills/skill-generate-social-posts/package.json +1 -7
- package/skills/skill-generate-sql/package.json +1 -7
- package/skills/skill-gif-maker/package.json +1 -7
- package/skills/skill-github-manager/package.json +1 -7
- package/skills/skill-gmail/package.json +1 -7
- package/skills/skill-goal-quarterly-roadmap/package.json +1 -7
- package/skills/skill-grant-application-drafter/package.json +1 -7
- package/skills/skill-grocery-basket-optimizer/package.json +1 -7
- package/skills/skill-guest-communication-suite/package.json +1 -7
- package/skills/skill-habit-reflection-digest/package.json +1 -7
- package/skills/skill-highlight-reel-generator/package.json +1 -7
- package/skills/skill-homework-feedback-coach/package.json +1 -7
- package/skills/skill-hook/package.json +0 -6
- package/skills/skill-household-maintenance-mgr/package.json +1 -7
- package/skills/skill-http-server/package.json +1 -7
- package/skills/skill-image/SKILL.md +2 -0
- package/skills/skill-image/package.json +0 -5
- package/skills/skill-image/src/index-local.ts +5 -3
- package/skills/skill-image/src/providers/minimax.ts +94 -0
- package/skills/skill-image/src/types.ts +1 -1
- package/skills/skill-implementation/package.json +0 -6
- package/skills/skill-implementation-agent/package.json +1 -7
- package/skills/skill-implementation-plan/package.json +1 -7
- package/skills/skill-implementation-todo/package.json +1 -7
- package/skills/skill-inbox-priority-planner/package.json +1 -7
- package/skills/skill-invoice-dispute-helper/package.json +1 -7
- package/skills/skill-itinerary-architect/package.json +1 -7
- package/skills/skill-kpi-digest-generator/package.json +1 -7
- package/skills/skill-lab-notebook-formatter/package.json +1 -7
- package/skills/skill-landing-page-copy/package.json +1 -7
- package/skills/skill-latex-table-generator/package.json +1 -7
- package/skills/skill-learning-style-profiler/package.json +1 -7
- package/skills/skill-lesson-plan-customizer/package.json +1 -7
- package/skills/skill-livestream-runofshow/package.json +1 -7
- package/skills/skill-longform-structurer/package.json +1 -7
- package/skills/skill-lorem-generator/package.json +1 -7
- package/skills/skill-managehook/package.json +1 -3
- package/skills/skill-managemcp/package.json +1 -3
- package/skills/skill-manageskill/package.json +1 -3
- package/skills/skill-markdown-validator/package.json +1 -7
- package/skills/skill-mcp-builder/package.json +1 -7
- package/skills/skill-meal-plan-designer/package.json +1 -7
- package/skills/skill-meeting-insight-summarizer/package.json +1 -7
- package/skills/skill-merge-pdfs/package.json +1 -7
- package/skills/skill-microcopy-generator/package.json +1 -7
- package/skills/skill-mindfulness-prompt-cache/package.json +1 -7
- package/skills/skill-monitor/package.json +0 -1
- package/skills/skill-music/CLAUDE.md +9 -0
- package/skills/skill-music/SKILL.md +35 -0
- package/skills/{skill-generate-excel → skill-music}/package.json +4 -10
- package/skills/skill-music/src/index.ts +192 -0
- package/skills/skill-notion-manager/package.json +1 -7
- package/skills/skill-npmpublish/package.json +0 -5
- package/skills/skill-onboarding-sequence-builder/package.json +1 -7
- package/skills/skill-onsite-ops-checklist/package.json +1 -7
- package/skills/skill-outreach-cadence-designer/package.json +1 -7
- package/skills/skill-packaging-concept-studio/package.json +1 -7
- package/skills/skill-packing-plan-pro/package.json +1 -7
- package/skills/skill-parent-teacher-brief/package.json +1 -7
- package/skills/skill-partner-kit-assembler/package.json +1 -7
- package/skills/skill-payroll-change-prepper/package.json +1 -7
- package/skills/{skill-generate-pdf → skill-pdf-generate}/SKILL.md +2 -2
- package/skills/{skill-generate-docx → skill-pdf-generate}/package.json +6 -4
- package/skills/{skill-generate-pdf → skill-pdf-generate}/src/index.ts +109 -8
- package/skills/skill-pdf-read/SKILL.md +56 -0
- package/skills/skill-pdf-read/package.json +29 -0
- package/skills/skill-pdf-read/src/index.ts +320 -0
- package/skills/skill-pdf-read/tsconfig.json +8 -0
- package/skills/skill-persona-based-adwriter/package.json +1 -7
- package/skills/skill-persona-generator/package.json +1 -7
- package/skills/skill-personal-daily-ops/package.json +1 -7
- package/skills/skill-pet-care-scheduler/package.json +1 -7
- package/skills/skill-podcast-show-notes/package.json +1 -7
- package/skills/skill-presentation-theme-maker/package.json +1 -7
- package/skills/skill-press-release-drafter/package.json +1 -7
- package/skills/skill-print-collateral-designer/package.json +1 -7
- package/skills/skill-procurement-scorecard/package.json +1 -7
- package/skills/skill-product-demo-script/package.json +1 -7
- package/skills/skill-product-mockup/package.json +1 -7
- package/skills/skill-project-retro-companion/package.json +1 -7
- package/skills/skill-proposal-redline-advisor/package.json +1 -7
- package/skills/skill-read-csv/SKILL.md +1 -1
- package/skills/skill-read-csv/package.json +0 -1
- package/skills/skill-read-csv/src/index.ts +30 -5
- package/skills/skill-read-excel/SKILL.md +1 -1
- package/skills/skill-read-excel/package.json +0 -1
- package/skills/skill-read-excel/src/index.ts +21 -8
- package/skills/skill-read-image/SKILL.md +1 -1
- package/skills/skill-read-image/package.json +0 -1
- package/skills/skill-read-pdf/SKILL.md +1 -1
- package/skills/skill-read-pdf/package.json +0 -1
- package/skills/skill-read-pdf/src/index.ts +15 -3
- package/skills/skill-regex-tester/package.json +1 -7
- package/skills/skill-remove-background/package.json +1 -7
- package/skills/skill-risk-disclosure-kit/package.json +1 -7
- package/skills/skill-roi-comparison-tool/package.json +1 -7
- package/skills/skill-sales-call-recapper/package.json +1 -7
- package/skills/skill-scaffold-project/package.json +1 -7
- package/skills/skill-scancommitpr/package.json +0 -2
- package/skills/skill-scancommitpush/package.json +0 -2
- package/skills/skill-scholarship-tracker/package.json +1 -7
- package/skills/skill-scientific-figure-check/package.json +1 -7
- package/skills/skill-seating-chart-maker/package.json +1 -7
- package/skills/skill-security-audit/package.json +1 -7
- package/skills/skill-seo-brief-builder/package.json +1 -7
- package/skills/skill-siteanalyze/package.json +19 -5
- package/skills/skill-slack-assistant/package.json +1 -7
- package/skills/skill-sleep-routine-analyzer/package.json +1 -7
- package/skills/skill-sms/package.json +0 -1
- package/skills/skill-social-media-kit/package.json +1 -7
- package/skills/skill-sound-effects/SKILL.md +34 -0
- package/skills/{skill-jingle-composer → skill-sound-effects}/package.json +4 -10
- package/skills/skill-sound-effects/src/index.ts +172 -0
- package/skills/skill-sound-effects/tsconfig.json +8 -0
- package/skills/skill-split-pdf/package.json +1 -7
- package/skills/skill-sponsorship-proposal-lab/package.json +1 -7
- package/skills/skill-spreadsheet-cleanroom/package.json +1 -7
- package/skills/skill-statistical-test-selector/package.json +1 -7
- package/skills/skill-stress-relief-playbook/package.json +1 -7
- package/skills/skill-study-guide-builder/package.json +1 -7
- package/skills/skill-subscription-spend-watcher/package.json +1 -7
- package/skills/skill-subtitle/package.json +0 -6
- package/skills/skill-survey-insight-extractor/package.json +1 -7
- package/skills/skill-terraform-generator/package.json +1 -7
- package/skills/skill-testimonial-graphics/package.json +1 -7
- package/skills/skill-timesheet/package.json +0 -6
- package/skills/skill-tmux-session/package.json +0 -1
- package/skills/skill-transcript/SKILL.md +3 -1
- package/skills/skill-transcript/package.json +0 -5
- package/skills/skill-travel-budget-balancer/package.json +1 -7
- package/skills/skill-validate-config/package.json +1 -7
- package/skills/skill-video/SKILL.md +3 -1
- package/skills/skill-video/package.json +0 -5
- package/skills/skill-video/src/providers/index.ts +4 -1
- package/skills/skill-video/src/providers/minimax.ts +100 -0
- package/skills/skill-video/src/types.ts +1 -1
- package/skills/skill-video-cut-suggester/package.json +1 -7
- package/skills/skill-video-downloader/package.json +1 -7
- package/skills/skill-video-thumbnail/package.json +1 -7
- package/skills/skill-voiceover-casting-assistant/package.json +1 -7
- package/skills/skill-watermark/package.json +1 -7
- package/skills/skill-webcrawling/package.json +11 -8
- package/skills/skill-webinar-script-coach/package.json +1 -7
- package/skills/skill-wellness-progress-reporter/package.json +1 -7
- package/skills/skill-workout-cycle-planner/package.json +1 -7
- package/skills/skill-write/package.json +0 -5
- package/skills/skill-jingle-composer/CLAUDE.md +0 -19
- package/skills/skill-jingle-composer/src/index.ts +0 -250
- /package/skills/{skill-extract-audio → skill-audio-extract}/CLAUDE.md +0 -0
- /package/skills/{skill-extract-audio → skill-audio-extract}/src/index.ts +0 -0
- /package/skills/{skill-extract-audio → skill-audio-extract}/tsconfig.json +0 -0
- /package/skills/{skill-generate-docx → skill-doc-generate}/CLAUDE.md +0 -0
- /package/skills/{skill-generate-docx → skill-doc-generate}/tsconfig.json +0 -0
- /package/skills/{skill-generate-excel → skill-excel}/CLAUDE.md +0 -0
- /package/skills/{skill-generate-excel → skill-excel}/tsconfig.json +0 -0
- /package/skills/{skill-jingle-composer → skill-music}/tsconfig.json +0 -0
- /package/skills/{skill-generate-pdf → skill-pdf-generate}/CLAUDE.md +0 -0
- /package/skills/{skill-generate-pdf → skill-pdf-generate}/tsconfig.json +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@hasnaxyz/skill-pdf-read",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Read and extract text from PDF files with page-range selection and parallel processing",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"skill-pdf-read": "./src/index.ts"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"dev": "bun run src/index.ts",
|
|
11
|
+
"build": "bun build src/index.ts --outdir dist --target node",
|
|
12
|
+
"typecheck": "tsc --noEmit"
|
|
13
|
+
},
|
|
14
|
+
"dependencies": {
|
|
15
|
+
"pdf-parse": "^1.1.1"
|
|
16
|
+
},
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"@types/bun": "latest",
|
|
19
|
+
"typescript": "^5.7.0"
|
|
20
|
+
},
|
|
21
|
+
"publishConfig": {
|
|
22
|
+
"access": "restricted",
|
|
23
|
+
"registry": "https://registry.npmjs.org/"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"src",
|
|
27
|
+
"tsconfig.json"
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { readFile, writeFile, stat, mkdir } from 'fs/promises';
|
|
4
|
+
import { existsSync } from 'fs';
|
|
5
|
+
import { basename, extname, dirname, join, resolve } from 'path';
|
|
6
|
+
import { parseArgs } from 'util';
|
|
7
|
+
|
|
8
|
+
interface PageResult {
|
|
9
|
+
pageNumber: number;
|
|
10
|
+
text: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface PdfResult {
|
|
14
|
+
file: string;
|
|
15
|
+
pageCount: number;
|
|
16
|
+
pages: PageResult[];
|
|
17
|
+
metadata?: {
|
|
18
|
+
title?: string;
|
|
19
|
+
author?: string;
|
|
20
|
+
creator?: string;
|
|
21
|
+
producer?: string;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface ChunkResult {
|
|
26
|
+
chunkIndex: number;
|
|
27
|
+
startPage: number;
|
|
28
|
+
endPage: number;
|
|
29
|
+
text: string;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function parsePageRange(spec: string, totalPages: number): number[] {
|
|
33
|
+
const pages = new Set<number>();
|
|
34
|
+
for (const part of spec.split(',')) {
|
|
35
|
+
const trimmed = part.trim();
|
|
36
|
+
if (trimmed.includes('-')) {
|
|
37
|
+
const [start, end] = trimmed.split('-').map(n => parseInt(n.trim(), 10));
|
|
38
|
+
for (let i = start; i <= Math.min(end, totalPages); i++) {
|
|
39
|
+
if (i > 0) pages.add(i);
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
const num = parseInt(trimmed, 10);
|
|
43
|
+
if (num > 0 && num <= totalPages) pages.add(num);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return Array.from(pages).sort((a, b) => a - b);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async function loadPdfParse() {
|
|
50
|
+
try {
|
|
51
|
+
return (await import('pdf-parse')).default;
|
|
52
|
+
} catch {
|
|
53
|
+
console.error('pdf-parse not available. Install with: bun add pdf-parse');
|
|
54
|
+
process.exit(1);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function getPdfPageCount(buffer: Buffer): Promise<number> {
|
|
59
|
+
const pdfParse = await loadPdfParse();
|
|
60
|
+
const data = await pdfParse(buffer, { max: 0 });
|
|
61
|
+
return data.numpages;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function extractPdfText(buffer: Buffer, maxPages?: number): Promise<string> {
|
|
65
|
+
const pdfParse = await loadPdfParse();
|
|
66
|
+
const data = await pdfParse(buffer, maxPages ? { max: maxPages } : undefined);
|
|
67
|
+
return data.text;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function extractPdfMetadata(buffer: Buffer): Promise<PdfResult['metadata']> {
|
|
71
|
+
const pdfParse = await loadPdfParse();
|
|
72
|
+
const data = await pdfParse(buffer, { max: 0 });
|
|
73
|
+
return {
|
|
74
|
+
title: data.info?.Title,
|
|
75
|
+
author: data.info?.Author,
|
|
76
|
+
creator: data.info?.Creator,
|
|
77
|
+
producer: data.info?.Producer,
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async function readPdfChunked(
|
|
82
|
+
filePath: string,
|
|
83
|
+
chunkSize: number,
|
|
84
|
+
pageRange?: number[]
|
|
85
|
+
): Promise<ChunkResult[]> {
|
|
86
|
+
const buffer = await readFile(filePath);
|
|
87
|
+
const totalPages = await getPdfPageCount(buffer);
|
|
88
|
+
const pages = pageRange || Array.from({ length: totalPages }, (_, i) => i + 1);
|
|
89
|
+
|
|
90
|
+
const chunks: ChunkResult[] = [];
|
|
91
|
+
for (let i = 0; i < pages.length; i += chunkSize) {
|
|
92
|
+
const chunkPages = pages.slice(i, i + chunkSize);
|
|
93
|
+
const startPage = chunkPages[0];
|
|
94
|
+
const endPage = chunkPages[chunkPages.length - 1];
|
|
95
|
+
|
|
96
|
+
const pdfParse = await loadPdfParse();
|
|
97
|
+
const data = await pdfParse(buffer, { max: endPage });
|
|
98
|
+
const text = data.text;
|
|
99
|
+
|
|
100
|
+
chunks.push({
|
|
101
|
+
chunkIndex: Math.floor(i / chunkSize),
|
|
102
|
+
startPage,
|
|
103
|
+
endPage,
|
|
104
|
+
text,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return chunks;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function readSinglePdf(
|
|
112
|
+
filePath: string,
|
|
113
|
+
pageSpec?: string,
|
|
114
|
+
chunkSize?: number
|
|
115
|
+
): Promise<PdfResult> {
|
|
116
|
+
const buffer = await readFile(filePath);
|
|
117
|
+
const totalPages = await getPdfPageCount(buffer);
|
|
118
|
+
const metadata = await extractPdfMetadata(buffer);
|
|
119
|
+
|
|
120
|
+
let pageRange: number[] | undefined;
|
|
121
|
+
if (pageSpec) {
|
|
122
|
+
pageRange = parsePageRange(pageSpec, totalPages);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (chunkSize) {
|
|
126
|
+
const chunks = await readPdfChunked(filePath, chunkSize, pageRange);
|
|
127
|
+
const pages: PageResult[] = chunks.map(c => ({
|
|
128
|
+
pageNumber: c.startPage,
|
|
129
|
+
text: c.text,
|
|
130
|
+
}));
|
|
131
|
+
return { file: filePath, pageCount: totalPages, pages, metadata };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const maxPage = pageRange ? Math.max(...pageRange) : undefined;
|
|
135
|
+
const text = await extractPdfText(buffer, maxPage);
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
file: filePath,
|
|
139
|
+
pageCount: totalPages,
|
|
140
|
+
pages: [{ pageNumber: 1, text }],
|
|
141
|
+
metadata,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function readMultiplePdfs(
|
|
146
|
+
files: string[],
|
|
147
|
+
pageSpec?: string,
|
|
148
|
+
chunkSize?: number,
|
|
149
|
+
concurrency = 4
|
|
150
|
+
): Promise<PdfResult[]> {
|
|
151
|
+
const results: PdfResult[] = [];
|
|
152
|
+
const queue = [...files];
|
|
153
|
+
|
|
154
|
+
async function processNext(): Promise<void> {
|
|
155
|
+
while (queue.length > 0) {
|
|
156
|
+
const file = queue.shift()!;
|
|
157
|
+
console.log(`Reading: ${basename(file)}`);
|
|
158
|
+
const result = await readSinglePdf(file, pageSpec, chunkSize);
|
|
159
|
+
results.push(result);
|
|
160
|
+
console.log(` ${result.pageCount} pages, ${result.pages.reduce((s, p) => s + p.text.length, 0)} chars extracted`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
const workers = Array.from({ length: Math.min(concurrency, files.length) }, () => processNext());
|
|
165
|
+
await Promise.all(workers);
|
|
166
|
+
|
|
167
|
+
return results;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function formatText(results: PdfResult[]): string {
|
|
171
|
+
return results.map(r => {
|
|
172
|
+
const header = `=== ${basename(r.file)} (${r.pageCount} pages) ===\n`;
|
|
173
|
+
const text = r.pages.map(p => p.text).join('\n\n---\n\n');
|
|
174
|
+
return header + text;
|
|
175
|
+
}).join('\n\n');
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function formatJson(results: PdfResult[]): string {
|
|
179
|
+
return JSON.stringify(results, null, 2);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function formatMarkdown(results: PdfResult[]): string {
|
|
183
|
+
return results.map(r => {
|
|
184
|
+
const lines: string[] = [];
|
|
185
|
+
lines.push(`# ${basename(r.file)}`);
|
|
186
|
+
lines.push('');
|
|
187
|
+
if (r.metadata?.title) lines.push(`**Title:** ${r.metadata.title}`);
|
|
188
|
+
if (r.metadata?.author) lines.push(`**Author:** ${r.metadata.author}`);
|
|
189
|
+
lines.push(`**Pages:** ${r.pageCount}`);
|
|
190
|
+
lines.push('');
|
|
191
|
+
for (const page of r.pages) {
|
|
192
|
+
lines.push(`## Page ${page.pageNumber}`);
|
|
193
|
+
lines.push('');
|
|
194
|
+
lines.push(page.text);
|
|
195
|
+
lines.push('');
|
|
196
|
+
}
|
|
197
|
+
return lines.join('\n');
|
|
198
|
+
}).join('\n---\n\n');
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async function main() {
|
|
202
|
+
const { values, positionals } = parseArgs({
|
|
203
|
+
args: Bun.argv.slice(2),
|
|
204
|
+
options: {
|
|
205
|
+
pages: { type: 'string' },
|
|
206
|
+
'chunk-size': { type: 'string' },
|
|
207
|
+
format: { type: 'string', default: 'text' },
|
|
208
|
+
output: { type: 'string', short: 'o' },
|
|
209
|
+
concurrency: { type: 'string', default: '4' },
|
|
210
|
+
help: { type: 'boolean', short: 'h' },
|
|
211
|
+
},
|
|
212
|
+
allowPositionals: true,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const command = positionals[0];
|
|
216
|
+
const files = positionals.slice(1);
|
|
217
|
+
|
|
218
|
+
if (values.help || !command) {
|
|
219
|
+
console.log(`
|
|
220
|
+
PDF Read - Extract text from PDF files
|
|
221
|
+
|
|
222
|
+
Usage:
|
|
223
|
+
skill-pdf-read read <files...> [options]
|
|
224
|
+
skill-pdf-read info <files...>
|
|
225
|
+
|
|
226
|
+
Commands:
|
|
227
|
+
read Extract text from PDFs
|
|
228
|
+
info Show PDF metadata and page counts
|
|
229
|
+
|
|
230
|
+
Read Options:
|
|
231
|
+
--pages <spec> Page range (e.g., "1-5", "3,7,10-15")
|
|
232
|
+
--chunk-size <n> Read in chunks of N pages (for large files)
|
|
233
|
+
--format <fmt> Output format: text, json, markdown (default: text)
|
|
234
|
+
--output, -o <path> Write output to file
|
|
235
|
+
--concurrency <n> Parallel file processing (default: 4)
|
|
236
|
+
|
|
237
|
+
Examples:
|
|
238
|
+
skill-pdf-read read document.pdf
|
|
239
|
+
skill-pdf-read read report.pdf --pages 1-5 --format json
|
|
240
|
+
skill-pdf-read read *.pdf --chunk-size 10 --output extracted.txt
|
|
241
|
+
skill-pdf-read info document.pdf
|
|
242
|
+
`);
|
|
243
|
+
process.exit(0);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (files.length === 0) {
|
|
247
|
+
console.error('Error: At least one PDF file is required');
|
|
248
|
+
process.exit(1);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
for (const file of files) {
|
|
252
|
+
if (!existsSync(file)) {
|
|
253
|
+
console.error(`File not found: ${file}`);
|
|
254
|
+
process.exit(1);
|
|
255
|
+
}
|
|
256
|
+
if (extname(file).toLowerCase() !== '.pdf') {
|
|
257
|
+
console.error(`Not a PDF file: ${file}`);
|
|
258
|
+
process.exit(1);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
switch (command) {
|
|
263
|
+
case 'read': {
|
|
264
|
+
const chunkSize = values['chunk-size'] ? parseInt(values['chunk-size'] as string) : undefined;
|
|
265
|
+
const concurrency = parseInt(values.concurrency as string) || 4;
|
|
266
|
+
|
|
267
|
+
console.log(`\nReading ${files.length} PDF file(s)...\n`);
|
|
268
|
+
|
|
269
|
+
const results = await readMultiplePdfs(files, values.pages as string, chunkSize, concurrency);
|
|
270
|
+
|
|
271
|
+
let output: string;
|
|
272
|
+
switch (values.format) {
|
|
273
|
+
case 'json': output = formatJson(results); break;
|
|
274
|
+
case 'markdown': case 'md': output = formatMarkdown(results); break;
|
|
275
|
+
default: output = formatText(results); break;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (values.output) {
|
|
279
|
+
const outputPath = resolve(values.output as string);
|
|
280
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
281
|
+
await writeFile(outputPath, output);
|
|
282
|
+
console.log(`\nOutput saved to: ${outputPath}`);
|
|
283
|
+
} else {
|
|
284
|
+
console.log('\n' + output);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
const totalChars = results.reduce((s, r) => s + r.pages.reduce((ps, p) => ps + p.text.length, 0), 0);
|
|
288
|
+
const totalPages = results.reduce((s, r) => s + r.pageCount, 0);
|
|
289
|
+
console.log(`\nProcessed: ${files.length} files, ${totalPages} pages, ${totalChars.toLocaleString()} characters`);
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
case 'info': {
|
|
294
|
+
for (const file of files) {
|
|
295
|
+
const buffer = await readFile(file);
|
|
296
|
+
const pageCount = await getPdfPageCount(buffer);
|
|
297
|
+
const metadata = await extractPdfMetadata(buffer);
|
|
298
|
+
const fileStats = await stat(file);
|
|
299
|
+
|
|
300
|
+
console.log(`\n${basename(file)}:`);
|
|
301
|
+
console.log(` Pages: ${pageCount}`);
|
|
302
|
+
console.log(` Size: ${(fileStats.size / 1024).toFixed(1)} KB`);
|
|
303
|
+
if (metadata?.title) console.log(` Title: ${metadata.title}`);
|
|
304
|
+
if (metadata?.author) console.log(` Author: ${metadata.author}`);
|
|
305
|
+
if (metadata?.creator) console.log(` Creator: ${metadata.creator}`);
|
|
306
|
+
if (metadata?.producer) console.log(` Producer: ${metadata.producer}`);
|
|
307
|
+
}
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
default:
|
|
312
|
+
console.error(`Unknown command: ${command}`);
|
|
313
|
+
process.exit(1);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
main().catch(err => {
|
|
318
|
+
console.error('Error:', err.message);
|
|
319
|
+
process.exit(1);
|
|
320
|
+
});
|
|
@@ -3,9 +3,6 @@
|
|
|
3
3
|
import { createReadStream } from "fs";
|
|
4
4
|
import { mkdir, open, readFile, writeFile } from "fs/promises";
|
|
5
5
|
import { dirname, resolve } from "path";
|
|
6
|
-
import { parse as createParser } from "csv-parse";
|
|
7
|
-
import { parse as parseCsvSync } from "csv-parse/sync";
|
|
8
|
-
import iconv from "iconv-lite";
|
|
9
6
|
|
|
10
7
|
const VERSION = "0.1.0";
|
|
11
8
|
const SAMPLE_BYTES = 128 * 1024;
|
|
@@ -32,6 +29,30 @@ interface CsvResult {
|
|
|
32
29
|
rows: Array<Record<string, string | null>>;
|
|
33
30
|
}
|
|
34
31
|
|
|
32
|
+
async function loadCsvParse() {
|
|
33
|
+
try {
|
|
34
|
+
return (await import("csv-parse")).parse;
|
|
35
|
+
} catch {
|
|
36
|
+
throw new Error("Missing dependency 'csv-parse'. Run bun install in this skill directory.");
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async function loadCsvParseSync() {
|
|
41
|
+
try {
|
|
42
|
+
return (await import("csv-parse/sync")).parse;
|
|
43
|
+
} catch {
|
|
44
|
+
throw new Error("Missing dependency 'csv-parse'. Run bun install in this skill directory.");
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function loadIconv() {
|
|
49
|
+
try {
|
|
50
|
+
return (await import("iconv-lite")).default;
|
|
51
|
+
} catch {
|
|
52
|
+
throw new Error("Missing dependency 'iconv-lite'. Run bun install in this skill directory.");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
35
56
|
function printHelp(): void {
|
|
36
57
|
console.log(`skill-read-csv v${VERSION}
|
|
37
58
|
|
|
@@ -195,7 +216,8 @@ function detectDelimiter(sampleText: string): string {
|
|
|
195
216
|
return bestDelimiter;
|
|
196
217
|
}
|
|
197
218
|
|
|
198
|
-
function parseSampleRows(sampleText: string, delimiter: string): string[][] {
|
|
219
|
+
async function parseSampleRows(sampleText: string, delimiter: string): Promise<string[][]> {
|
|
220
|
+
const parseCsvSync = await loadCsvParseSync();
|
|
199
221
|
const records = parseCsvSync(sampleText, {
|
|
200
222
|
delimiter,
|
|
201
223
|
relax_column_count: true,
|
|
@@ -266,6 +288,8 @@ async function parseCsvFile(
|
|
|
266
288
|
columns: string[],
|
|
267
289
|
hasHeader: boolean,
|
|
268
290
|
): Promise<{ rows: Array<Record<string, string | null>>; truncated: boolean }> {
|
|
291
|
+
const iconv = await loadIconv();
|
|
292
|
+
const createParser = await loadCsvParse();
|
|
269
293
|
const rows: Array<Record<string, string | null>> = [];
|
|
270
294
|
let truncated = false;
|
|
271
295
|
const stream = createReadStream(path, bomBytes > 0 ? { start: bomBytes } : undefined)
|
|
@@ -299,10 +323,11 @@ async function main(): Promise<void> {
|
|
|
299
323
|
const options = parseArgs(process.argv.slice(2));
|
|
300
324
|
const inputPath = resolve(options.input!);
|
|
301
325
|
const sample = await readSample(inputPath);
|
|
326
|
+
const iconv = await loadIconv();
|
|
302
327
|
const { encoding, bomBytes } = detectEncoding(sample, options.encoding.toLowerCase());
|
|
303
328
|
const sampleText = iconv.decode(sample.subarray(bomBytes), encoding);
|
|
304
329
|
const delimiter = normalizeDelimiter(options.delimiter) ?? detectDelimiter(sampleText);
|
|
305
|
-
const sampleRows = parseSampleRows(sampleText, delimiter);
|
|
330
|
+
const sampleRows = await parseSampleRows(sampleText, delimiter);
|
|
306
331
|
const { hasHeader, columns } = buildColumns(sampleRows, options.headers);
|
|
307
332
|
const { rows, truncated } = await parseCsvFile(inputPath, options, encoding, bomBytes, delimiter, columns, hasHeader);
|
|
308
333
|
|