pydatamax 0.1.11__tar.gz → 0.1.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {pydatamax-0.1.11/pydatamax.egg-info → pydatamax-0.1.12}/PKG-INFO +40 -30
  2. {pydatamax-0.1.11 → pydatamax-0.1.12/pydatamax.egg-info}/PKG-INFO +40 -30
  3. pydatamax-0.1.12/pydatamax.egg-info/requires.txt +36 -0
  4. pydatamax-0.1.12/setup.py +57 -0
  5. pydatamax-0.1.11/pydatamax.egg-info/requires.txt +0 -26
  6. pydatamax-0.1.11/setup.py +0 -47
  7. {pydatamax-0.1.11 → pydatamax-0.1.12}/LICENSE +0 -0
  8. {pydatamax-0.1.11 → pydatamax-0.1.12}/README.md +0 -0
  9. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/__init__.py +0 -0
  10. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/loader/MinioHandler.py +0 -0
  11. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/loader/OssHandler.py +0 -0
  12. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/loader/__init__.py +0 -0
  13. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/loader/core.py +0 -0
  14. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/__init__.py +0 -0
  15. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/base.py +0 -0
  16. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/core.py +0 -0
  17. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/csv_parser.py +0 -0
  18. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/doc_parser.py +0 -0
  19. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/docx_parser.py +0 -0
  20. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/epub_parser.py +0 -0
  21. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/html_parser.py +0 -0
  22. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/image_parser.py +0 -0
  23. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/json_parser.py +0 -0
  24. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/md_parser.py +0 -0
  25. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/pdf_parser.py +0 -0
  26. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/ppt_parser.py +0 -0
  27. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/pptx_parser.py +0 -0
  28. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/txt_parser.py +0 -0
  29. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/xls_parser.py +0 -0
  30. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/parser/xlsx_parser.py +0 -0
  31. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/__init__.py +0 -0
  32. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/constants.py +0 -0
  33. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/data_cleaner.py +0 -0
  34. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/env_setup.py +0 -0
  35. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/gotocr_pdf.py +0 -0
  36. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/mineru_operator.py +0 -0
  37. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/paddleocr_pdf_operator.py +0 -0
  38. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/ppt_extract.py +0 -0
  39. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/qa_generator.py +0 -0
  40. {pydatamax-0.1.11 → pydatamax-0.1.12}/datamax/utils/tokenizer.py +0 -0
  41. {pydatamax-0.1.11 → pydatamax-0.1.12}/pydatamax.egg-info/SOURCES.txt +0 -0
  42. {pydatamax-0.1.11 → pydatamax-0.1.12}/pydatamax.egg-info/dependency_links.txt +0 -0
  43. {pydatamax-0.1.11 → pydatamax-0.1.12}/pydatamax.egg-info/top_level.txt +0 -0
  44. {pydatamax-0.1.11 → pydatamax-0.1.12}/setup.cfg +0 -0
  45. {pydatamax-0.1.11 → pydatamax-0.1.12}/tests/__init__.py +0 -0
  46. {pydatamax-0.1.11 → pydatamax-0.1.12}/tests/test_basic.py +0 -0
@@ -1,42 +1,52 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydatamax
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: A library for parsing and converting various file formats.
5
- Home-page: https://github.com/cosco/datamax
6
- Author: hzb | ccy
7
- Author-email: zhibaohe@hotmail.com | cy.kron@foxmail.com
5
+ Home-page: https://github.com/Hi-Dolphin/datamax
6
+ Author: ccy
7
+ Author-email: cy.kron@foxmail.com
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: ebooklib
15
- Requires-Dist: python-docx
16
- Requires-Dist: beautifulsoup4
17
- Requires-Dist: python-dotenv
18
- Requires-Dist: minio
19
- Requires-Dist: loguru
20
- Requires-Dist: tqdm
21
- Requires-Dist: oss2
22
- Requires-Dist: python-docx
23
- Requires-Dist: openai
24
- Requires-Dist: jionlp
25
- Requires-Dist: chardet
26
- Requires-Dist: python-pptx
27
- Requires-Dist: openpyxl
28
- Requires-Dist: pymupdf
29
- Requires-Dist: langchain_community==0.2.9
30
- Requires-Dist: premailer
31
- Requires-Dist: setuptools==75.3.0
32
- Requires-Dist: docx2markdown
33
- Requires-Dist: tiktoken
34
- Requires-Dist: markitdown
35
- Requires-Dist: pandas
36
- Requires-Dist: xlrd
37
- Requires-Dist: tabulate
38
- Requires-Dist: unstructured[all]
39
- Requires-Dist: markdown
14
+ Requires-Dist: oss2<3.0.0,>=2.19.1
15
+ Requires-Dist: aliyun-python-sdk-core<3.0.0,>=2.16.0
16
+ Requires-Dist: aliyun-python-sdk-kms<3.0.0,>=2.16.5
17
+ Requires-Dist: crcmod<2.0.0,>=1.7
18
+ Requires-Dist: langdetect<2.0.0,>=1.0.9
19
+ Requires-Dist: loguru<1.0.0,>=0.7.3
20
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
21
+ Requires-Dist: python-dotenv<2.0.0,>=1.1.0
22
+ Requires-Dist: pymupdf<2.0.0,>=1.26.0
23
+ Requires-Dist: pypdf<6.0.0,>=5.5.0
24
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
25
+ Requires-Dist: pandas<3.0.0,>=2.2.3
26
+ Requires-Dist: numpy<3.0.0,>=2.2.6
27
+ Requires-Dist: requests<3.0.0,>=2.32.3
28
+ Requires-Dist: tqdm<5.0.0,>=4.67.1
29
+ Requires-Dist: pydantic<3.0.0,>=2.11.5
30
+ Requires-Dist: pydantic-settings<3.0.0,>=2.9.1
31
+ Requires-Dist: python-magic<1.0.0,>=0.4.27
32
+ Requires-Dist: PyYAML<7.0.0,>=6.0.2
33
+ Requires-Dist: Pillow<12.0.0,>=11.2.1
34
+ Requires-Dist: packaging<25.0,>=24.2
35
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.13.4
36
+ Requires-Dist: minio<8.0.0,>=7.2.15
37
+ Requires-Dist: openai<2.0.0,>=1.82.0
38
+ Requires-Dist: jionlp<2.0.0,>=1.5.23
39
+ Requires-Dist: chardet<6.0.0,>=5.2.0
40
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
41
+ Requires-Dist: docx2markdown<1.0.0,>=0.1.1
42
+ Requires-Dist: tiktoken<1.0.0,>=0.9.0
43
+ Requires-Dist: markitdown<1.0.0,>=0.1.1
44
+ Requires-Dist: xlrd<3.0.0,>=2.0.1
45
+ Requires-Dist: tabulate<1.0.0,>=0.9.0
46
+ Requires-Dist: unstructured<1.0.0,>=0.17.2
47
+ Requires-Dist: markdown<4.0.0,>=3.8
48
+ Requires-Dist: langchain<1.0.0,>=0.3.0
49
+ Requires-Dist: langchain-community<1.0.0,>=0.3.0
40
50
  Dynamic: author
41
51
  Dynamic: author-email
42
52
  Dynamic: classifier
@@ -1,42 +1,52 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydatamax
3
- Version: 0.1.11
3
+ Version: 0.1.12
4
4
  Summary: A library for parsing and converting various file formats.
5
- Home-page: https://github.com/cosco/datamax
6
- Author: hzb | ccy
7
- Author-email: zhibaohe@hotmail.com | cy.kron@foxmail.com
5
+ Home-page: https://github.com/Hi-Dolphin/datamax
6
+ Author: ccy
7
+ Author-email: cy.kron@foxmail.com
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: ebooklib
15
- Requires-Dist: python-docx
16
- Requires-Dist: beautifulsoup4
17
- Requires-Dist: python-dotenv
18
- Requires-Dist: minio
19
- Requires-Dist: loguru
20
- Requires-Dist: tqdm
21
- Requires-Dist: oss2
22
- Requires-Dist: python-docx
23
- Requires-Dist: openai
24
- Requires-Dist: jionlp
25
- Requires-Dist: chardet
26
- Requires-Dist: python-pptx
27
- Requires-Dist: openpyxl
28
- Requires-Dist: pymupdf
29
- Requires-Dist: langchain_community==0.2.9
30
- Requires-Dist: premailer
31
- Requires-Dist: setuptools==75.3.0
32
- Requires-Dist: docx2markdown
33
- Requires-Dist: tiktoken
34
- Requires-Dist: markitdown
35
- Requires-Dist: pandas
36
- Requires-Dist: xlrd
37
- Requires-Dist: tabulate
38
- Requires-Dist: unstructured[all]
39
- Requires-Dist: markdown
14
+ Requires-Dist: oss2<3.0.0,>=2.19.1
15
+ Requires-Dist: aliyun-python-sdk-core<3.0.0,>=2.16.0
16
+ Requires-Dist: aliyun-python-sdk-kms<3.0.0,>=2.16.5
17
+ Requires-Dist: crcmod<2.0.0,>=1.7
18
+ Requires-Dist: langdetect<2.0.0,>=1.0.9
19
+ Requires-Dist: loguru<1.0.0,>=0.7.3
20
+ Requires-Dist: python-docx<2.0.0,>=1.1.2
21
+ Requires-Dist: python-dotenv<2.0.0,>=1.1.0
22
+ Requires-Dist: pymupdf<2.0.0,>=1.26.0
23
+ Requires-Dist: pypdf<6.0.0,>=5.5.0
24
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
25
+ Requires-Dist: pandas<3.0.0,>=2.2.3
26
+ Requires-Dist: numpy<3.0.0,>=2.2.6
27
+ Requires-Dist: requests<3.0.0,>=2.32.3
28
+ Requires-Dist: tqdm<5.0.0,>=4.67.1
29
+ Requires-Dist: pydantic<3.0.0,>=2.11.5
30
+ Requires-Dist: pydantic-settings<3.0.0,>=2.9.1
31
+ Requires-Dist: python-magic<1.0.0,>=0.4.27
32
+ Requires-Dist: PyYAML<7.0.0,>=6.0.2
33
+ Requires-Dist: Pillow<12.0.0,>=11.2.1
34
+ Requires-Dist: packaging<25.0,>=24.2
35
+ Requires-Dist: beautifulsoup4<5.0.0,>=4.13.4
36
+ Requires-Dist: minio<8.0.0,>=7.2.15
37
+ Requires-Dist: openai<2.0.0,>=1.82.0
38
+ Requires-Dist: jionlp<2.0.0,>=1.5.23
39
+ Requires-Dist: chardet<6.0.0,>=5.2.0
40
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2
41
+ Requires-Dist: docx2markdown<1.0.0,>=0.1.1
42
+ Requires-Dist: tiktoken<1.0.0,>=0.9.0
43
+ Requires-Dist: markitdown<1.0.0,>=0.1.1
44
+ Requires-Dist: xlrd<3.0.0,>=2.0.1
45
+ Requires-Dist: tabulate<1.0.0,>=0.9.0
46
+ Requires-Dist: unstructured<1.0.0,>=0.17.2
47
+ Requires-Dist: markdown<4.0.0,>=3.8
48
+ Requires-Dist: langchain<1.0.0,>=0.3.0
49
+ Requires-Dist: langchain-community<1.0.0,>=0.3.0
40
50
  Dynamic: author
41
51
  Dynamic: author-email
42
52
  Dynamic: classifier
@@ -0,0 +1,36 @@
1
+ oss2<3.0.0,>=2.19.1
2
+ aliyun-python-sdk-core<3.0.0,>=2.16.0
3
+ aliyun-python-sdk-kms<3.0.0,>=2.16.5
4
+ crcmod<2.0.0,>=1.7
5
+ langdetect<2.0.0,>=1.0.9
6
+ loguru<1.0.0,>=0.7.3
7
+ python-docx<2.0.0,>=1.1.2
8
+ python-dotenv<2.0.0,>=1.1.0
9
+ pymupdf<2.0.0,>=1.26.0
10
+ pypdf<6.0.0,>=5.5.0
11
+ openpyxl<4.0.0,>=3.1.5
12
+ pandas<3.0.0,>=2.2.3
13
+ numpy<3.0.0,>=2.2.6
14
+ requests<3.0.0,>=2.32.3
15
+ tqdm<5.0.0,>=4.67.1
16
+ pydantic<3.0.0,>=2.11.5
17
+ pydantic-settings<3.0.0,>=2.9.1
18
+ python-magic<1.0.0,>=0.4.27
19
+ PyYAML<7.0.0,>=6.0.2
20
+ Pillow<12.0.0,>=11.2.1
21
+ packaging<25.0,>=24.2
22
+ beautifulsoup4<5.0.0,>=4.13.4
23
+ minio<8.0.0,>=7.2.15
24
+ openai<2.0.0,>=1.82.0
25
+ jionlp<2.0.0,>=1.5.23
26
+ chardet<6.0.0,>=5.2.0
27
+ python-pptx<2.0.0,>=1.0.2
28
+ docx2markdown<1.0.0,>=0.1.1
29
+ tiktoken<1.0.0,>=0.9.0
30
+ markitdown<1.0.0,>=0.1.1
31
+ xlrd<3.0.0,>=2.0.1
32
+ tabulate<1.0.0,>=0.9.0
33
+ unstructured<1.0.0,>=0.17.2
34
+ markdown<4.0.0,>=3.8
35
+ langchain<1.0.0,>=0.3.0
36
+ langchain-community<1.0.0,>=0.3.0
@@ -0,0 +1,57 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='pydatamax',
5
+ version='0.1.12',
6
+ packages=find_packages(),
7
+ install_requires=[
8
+ "oss2>=2.19.1,<3.0.0",
9
+ "aliyun-python-sdk-core>=2.16.0,<3.0.0",
10
+ "aliyun-python-sdk-kms>=2.16.5,<3.0.0",
11
+ "crcmod>=1.7,<2.0.0",
12
+ "langdetect>=1.0.9,<2.0.0",
13
+ "loguru>=0.7.3,<1.0.0",
14
+ "python-docx>=1.1.2,<2.0.0",
15
+ "python-dotenv>=1.1.0,<2.0.0",
16
+ "pymupdf>=1.26.0,<2.0.0",
17
+ "pypdf>=5.5.0,<6.0.0",
18
+ "openpyxl>=3.1.5,<4.0.0",
19
+ "pandas>=2.2.3,<3.0.0",
20
+ "numpy>=2.2.6,<3.0.0",
21
+ "requests>=2.32.3,<3.0.0",
22
+ "tqdm>=4.67.1,<5.0.0",
23
+ "pydantic>=2.11.5,<3.0.0",
24
+ "pydantic-settings>=2.9.1,<3.0.0",
25
+ "python-magic>=0.4.27,<1.0.0",
26
+ "PyYAML>=6.0.2,<7.0.0",
27
+ "Pillow>=11.2.1,<12.0.0",
28
+ "packaging>=24.2,<25.0",
29
+ "beautifulsoup4>=4.13.4,<5.0.0",
30
+ "minio>=7.2.15,<8.0.0",
31
+ "openai>=1.82.0,<2.0.0",
32
+ "jionlp>=1.5.23,<2.0.0",
33
+ "chardet>=5.2.0,<6.0.0",
34
+ "python-pptx>=1.0.2,<2.0.0",
35
+ "docx2markdown>=0.1.1,<1.0.0",
36
+ "tiktoken>=0.9.0,<1.0.0",
37
+ "markitdown>=0.1.1,<1.0.0",
38
+ "xlrd>=2.0.1,<3.0.0",
39
+ "tabulate>=0.9.0,<1.0.0",
40
+ "unstructured>=0.17.2,<1.0.0",
41
+ "markdown>=3.8,<4.0.0",
42
+ "langchain>=0.3.0,<1.0.0",
43
+ "langchain-community>=0.3.0,<1.0.0",
44
+ ],
45
+ author='ccy',
46
+ author_email='cy.kron@foxmail.com',
47
+ description='A library for parsing and converting various file formats.',
48
+ long_description=open('README.md', encoding='utf-8').read(),
49
+ long_description_content_type='text/markdown',
50
+ url='https://github.com/Hi-Dolphin/datamax',
51
+ classifiers=[
52
+ 'Programming Language :: Python :: 3',
53
+ 'License :: OSI Approved :: MIT License',
54
+ 'Operating System :: OS Independent',
55
+ ],
56
+ python_requires='>=3.10',
57
+ )
@@ -1,26 +0,0 @@
1
- ebooklib
2
- python-docx
3
- beautifulsoup4
4
- python-dotenv
5
- minio
6
- loguru
7
- tqdm
8
- oss2
9
- python-docx
10
- openai
11
- jionlp
12
- chardet
13
- python-pptx
14
- openpyxl
15
- pymupdf
16
- langchain_community==0.2.9
17
- premailer
18
- setuptools==75.3.0
19
- docx2markdown
20
- tiktoken
21
- markitdown
22
- pandas
23
- xlrd
24
- tabulate
25
- unstructured[all]
26
- markdown
pydatamax-0.1.11/setup.py DELETED
@@ -1,47 +0,0 @@
1
- from setuptools import setup, find_packages
2
-
3
- setup(
4
- name='pydatamax',
5
- version='0.1.11',
6
- packages=find_packages(),
7
- install_requires=[
8
- 'ebooklib',
9
- 'python-docx',
10
- 'beautifulsoup4',
11
- "python-dotenv",
12
- "minio",
13
- "loguru",
14
- "tqdm",
15
- "oss2",
16
- "python-docx",
17
- "openai",
18
- "jionlp",
19
- "chardet",
20
- "python-pptx",
21
- "openpyxl",
22
- "pymupdf",
23
- "langchain_community==0.2.9",
24
- "premailer",
25
- "setuptools==75.3.0",
26
- "docx2markdown",
27
- "tiktoken",
28
- "markitdown",
29
- "pandas",
30
- "xlrd",
31
- "tabulate",
32
- "unstructured[all]",
33
- "markdown"
34
- ],
35
- author='hzb | ccy',
36
- author_email='zhibaohe@hotmail.com | cy.kron@foxmail.com',
37
- description='A library for parsing and converting various file formats.',
38
- long_description=open('README.md', encoding='utf-8').read(),
39
- long_description_content_type='text/markdown',
40
- url='https://github.com/cosco/datamax',
41
- classifiers=[
42
- 'Programming Language :: Python :: 3',
43
- 'License :: OSI Approved :: MIT License',
44
- 'Operating System :: OS Independent',
45
- ],
46
- python_requires='>=3.10',
47
- )
File without changes
File without changes
File without changes
File without changes