markitdown-ts 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,7 +7,6 @@
7
7
  It supports:
8
8
 
9
9
  - [x] PDF
10
- - [x] PowerPoint
11
10
  - [x] Word (.docx)
12
11
  - [x] Excel (.xlsx)
13
12
  - [x] Images (EXIF metadata extraction and optional LLM-based description)
@@ -17,6 +16,7 @@ It supports:
17
16
  - [x] Jupyter Notebooks (.ipynb)
18
17
  - [x] Bing Search Result Pages (SERP)
19
18
  - [x] ZIP files (recursively iterates over contents)
19
+ - [ ] PowerPoint
20
20
 
21
21
  > [!NOTE]
22
22
  >
package/dist/index.cjs CHANGED
@@ -16,7 +16,6 @@ const util = require('util');
16
16
  const fs$1 = require('fs/promises');
17
17
  const os = require('os');
18
18
  const ai = require('ai');
19
- const unzipper = require('unzipper');
20
19
 
21
20
  function _interopDefaultCompat (e) { return e && typeof e === 'object' && 'default' in e ? e.default : e; }
22
21
 
@@ -40,12 +39,11 @@ const fs__namespace = /*#__PURE__*/_interopNamespaceCompat(fs);
40
39
  const TurndownService__default = /*#__PURE__*/_interopDefaultCompat(TurndownService);
41
40
  const turndownPluginGfm__default = /*#__PURE__*/_interopDefaultCompat(turndownPluginGfm);
42
41
  const Mammoth__default = /*#__PURE__*/_interopDefaultCompat(Mammoth);
43
- const XLSX__default = /*#__PURE__*/_interopDefaultCompat(XLSX);
42
+ const XLSX__namespace = /*#__PURE__*/_interopNamespaceCompat(XLSX);
44
43
  const childProcess__namespace = /*#__PURE__*/_interopNamespaceCompat(childProcess);
45
44
  const util__namespace = /*#__PURE__*/_interopNamespaceCompat(util);
46
45
  const fs__namespace$1 = /*#__PURE__*/_interopNamespaceCompat(fs$1);
47
46
  const os__namespace = /*#__PURE__*/_interopNamespaceCompat(os);
48
- const unzipper__namespace = /*#__PURE__*/_interopNamespaceCompat(unzipper);
49
47
 
50
48
  class PlainTextConverter {
51
49
  async convert(local_path, options = {}) {
@@ -707,12 +705,12 @@ class XlsxConverter extends HtmlConverter {
707
705
  if (!exists) {
708
706
  throw new Error("File does'nt exists");
709
707
  }
710
- let workbook = XLSX__default.readFile(local_path);
708
+ let workbook = XLSX__namespace.readFile(local_path);
711
709
  let mdContent = "";
712
710
  for (const sheetName of workbook.SheetNames) {
713
711
  mdContent += `## ${sheetName}
714
712
  `;
715
- let htmlContent = XLSX__default.utils.sheet_to_html(workbook.Sheets[sheetName]);
713
+ let htmlContent = XLSX__namespace.utils.sheet_to_html(workbook.Sheets[sheetName]);
716
714
  mdContent += (await this._convert(htmlContent))?.text_content.trim() + "\n\n";
717
715
  }
718
716
  return {
@@ -959,9 +957,18 @@ class ZipConverter {
959
957
  text_content: `[ERROR] Invalid zip file path: ${localPath}`
960
958
  };
961
959
  }
960
+ let unzipper;
961
+ try {
962
+ unzipper = await import('unzipper').then((mod) => mod.default);
963
+ } catch (error) {
964
+ console.error(
965
+ "Optional dependency 'unzipper' is not installed. Run `npm install unzipper` to enable this feature."
966
+ );
967
+ return null;
968
+ }
962
969
  try {
963
970
  await fs__namespace$1.mkdir(newFolder, { recursive: true });
964
- const zip = await unzipper__namespace.Open.file(localPath);
971
+ const zip = await unzipper.Open.file(localPath);
965
972
  await zip.extract({ path: newFolder });
966
973
  const files = await this._walk(newFolder);
967
974
  for (const { root, name } of files) {
package/dist/index.mjs CHANGED
@@ -10,13 +10,12 @@ import { DOMParser } from '@xmldom/xmldom';
10
10
  import { URL as URL$1 } from 'url';
11
11
  import { pdfToText } from 'pdf-ts';
12
12
  import Mammoth from 'mammoth';
13
- import XLSX from 'xlsx';
13
+ import * as XLSX from 'xlsx';
14
14
  import * as childProcess from 'child_process';
15
15
  import * as util from 'util';
16
16
  import * as fs$1 from 'fs/promises';
17
17
  import * as os from 'os';
18
18
  import { generateText } from 'ai';
19
- import * as unzipper from 'unzipper';
20
19
 
21
20
  class PlainTextConverter {
22
21
  async convert(local_path, options = {}) {
@@ -930,6 +929,15 @@ class ZipConverter {
930
929
  text_content: `[ERROR] Invalid zip file path: ${localPath}`
931
930
  };
932
931
  }
932
+ let unzipper;
933
+ try {
934
+ unzipper = await import('unzipper').then((mod) => mod.default);
935
+ } catch (error) {
936
+ console.error(
937
+ "Optional dependency 'unzipper' is not installed. Run `npm install unzipper` to enable this feature."
938
+ );
939
+ return null;
940
+ }
933
941
  try {
934
942
  await fs$1.mkdir(newFolder, { recursive: true });
935
943
  const zip = await unzipper.Open.file(localPath);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "markitdown-ts",
3
- "version": "0.0.3",
3
+ "version": "0.0.4",
4
4
  "description": "",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/dead8309/markitdown-ts#readme",
@@ -50,10 +50,10 @@
50
50
  "mime-types": "^2.1.35",
51
51
  "pdf-ts": "^0.0.2",
52
52
  "turndown": "^7.2.0",
53
- "xlsx": "^0.18.5"
53
+ "xlsx": "^0.18.5",
54
+ "ai": "^4.0.22"
54
55
  },
55
56
  "peerDependencies": {
56
- "ai": "^4.0.22",
57
57
  "youtube-transcript": "^1.2.1",
58
58
  "unzipper": "^0.12.3"
59
59
  },
@@ -61,9 +61,6 @@
61
61
  "youtube-transcript": {
62
62
  "optional": true
63
63
  },
64
- "ai": {
65
- "optional": true
66
- },
67
64
  "unzipper": {
68
65
  "optional": true
69
66
  }