docp 0.1.0b1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock +0 -0
- docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock +0 -0
- docp/__init__.py +19 -10
- docp/dbs/chroma.py +19 -6
- docp/libs/_version.py +1 -0
- docp/libs/changelog.py +7 -0
- docp/libs/utilities.py +107 -0
- docp/loaders/__init__.py +38 -0
- docp/loaders/_chromabaseloader.py +83 -107
- docp/loaders/_chromabaseloader.py.bak +378 -0
- docp/loaders/_chromabasepdfloader.py +121 -0
- docp/loaders/_chromabasepptxloader.py +123 -0
- docp/loaders/{chroma.py → chroma.py.bak} +38 -8
- docp/loaders/chromapdfloader.py +199 -0
- docp/loaders/chromapptxloader.py +192 -0
- docp/loaders/lutilities.py +52 -0
- docp/objects/_docbaseobject.py +7 -18
- docp/objects/_imgobject.py +0 -0
- docp/objects/_pageobject.py +3 -2
- docp/objects/_slideobject.py +110 -0
- docp/objects/_textobject.py +64 -0
- docp/objects/pdfobject.py +24 -2
- docp/objects/pptxobject.py +46 -0
- docp/parsers/_pdfbaseparser.py +36 -10
- docp/parsers/_pdftableparser.py +6 -7
- docp/parsers/_pdftextparser.py +23 -13
- docp/parsers/_pptxbaseparser.py +93 -0
- docp/parsers/_pptxtextparser.py +115 -0
- docp/parsers/pptxparser.py +51 -0
- docp/parsers/putilities.py +48 -0
- docp-0.2.0.dist-info/METADATA +110 -0
- docp-0.2.0.dist-info/RECORD +49 -0
- {docp-0.1.0b1.dist-info → docp-0.2.0.dist-info}/WHEEL +1 -1
- docp/_version.py +0 -1
- docp-0.1.0b1.dist-info/METADATA +0 -55
- docp-0.1.0b1.dist-info/RECORD +0 -23
- {docp-0.1.0b1.dist-info → docp-0.2.0.dist-info}/LICENSE +0 -0
- {docp-0.1.0b1.dist-info → docp-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: docp
|
3
|
+
Version: 0.2.0
|
4
|
+
Summary: A basic document parsing and loading utility.
|
5
|
+
Author-email: The Developers <development@s3dev.uk>
|
6
|
+
License: GNU GPL-3
|
7
|
+
Project-URL: Documentation, https://docp.readthedocs.io
|
8
|
+
Project-URL: Homepage, https://github.com/s3dev/docp
|
9
|
+
Project-URL: Repository, https://github.com/s3dev/docp
|
10
|
+
Keywords: document,library,parsing,utility,utilities
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Intended Audience :: End Users/Desktop
|
14
|
+
Classifier: Programming Language :: Python :: 3.7
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
20
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
21
|
+
Classifier: Operating System :: POSIX :: Linux
|
22
|
+
Classifier: Operating System :: Microsoft :: Windows
|
23
|
+
Classifier: Topic :: Software Development
|
24
|
+
Classifier: Topic :: Software Development :: Libraries
|
25
|
+
Classifier: Topic :: Utilities
|
26
|
+
Requires-Python: >=3.7
|
27
|
+
Description-Content-Type: text/markdown
|
28
|
+
License-File: LICENSE
|
29
|
+
Requires-Dist: pandas
|
30
|
+
Requires-Dist: unidecode
|
31
|
+
Requires-Dist: utils4
|
32
|
+
|
33
|
+
|
34
|
+
# A basic document parsing and loading utility.
|
35
|
+
|
36
|
+
[](https://pypi.org/project/docp)
|
37
|
+
[](https://pypi.org/project/docp)
|
38
|
+
[](https://pypi.org/project/docp)
|
39
|
+
[](https://pypi.org/project/docp)
|
40
|
+
[](https://pypi.org/project/docp)
|
41
|
+
[](https://pypi.org/project/docp)
|
42
|
+
[](https://pypi.org/project/docp)
|
43
|
+
[](https://docp.readthedocs.io/en/latest/)
|
44
|
+
[](https://opensource.org/license/gpl-3-0)
|
45
|
+
[](https://pypi.org/project/docp)
|
46
|
+
|
47
|
+
In its simplest form, the ``docp`` project is a (doc)ument \(p\)arsing library.
|
48
|
+
|
49
|
+
Written in CPython, the project wraps various lower-level libraries, helping to consolidate binary document structure parsing functionality into a single library. Additional functionality includes [document loaders](#loaders) which load a parsed document's embeddings into a Chroma vector database, for RAG-enabled LLM use.
|
50
|
+
|
51
|
+
|
52
|
+
## Installation
|
53
|
+
The easiest way to install ``docp`` is using ``pip`` *after* activating your virtual environment::
|
54
|
+
|
55
|
+
pip install docp
|
56
|
+
|
57
|
+
Additional (older) releases can be found either at [PyPI](https://pypi.org/project/docp/#history) or in [GitHub Releases](https://github.com/s3dev/docp/releases).
|
58
|
+
|
59
|
+
### A note on the installation of dependencies:
|
60
|
+
To keep the installation dependencies to a minimum, only core libraries are required for installation. Meaning, the parser-specific and loader libraries are *not* installed automatically, as part of the ``pip install`` command.
|
61
|
+
|
62
|
+
If a parser is imported and a library is required but not installed, you'll be notified with an easy-to-read message, listing the required dependenc(y|ies).
|
63
|
+
|
64
|
+
The rationale behind this design decision is that not all users will need the document *loading* capability, so ``torch``, ``langchain``, etc. should not be installed automatically. For example, if your project requires a simple PDF parser, you don't need to (and likely don't want to) 'clutter' your environment with something as heavy as ``torch``, nor make your project dependent on it.
|
65
|
+
|
66
|
+
|
67
|
+
## The Toolset
|
68
|
+
|
69
|
+
### Parsers
|
70
|
+
As of this release, parsers for the following binary document types are supported:
|
71
|
+
|
72
|
+
- PDF
|
73
|
+
- MS PowerPoint (PPTX)
|
74
|
+
- (more coming soon)
|
75
|
+
|
76
|
+
### Loaders
|
77
|
+
In addition to document parsing, document *loading* functionality is built-in as well. Specifically, loading documents into a [Chroma](https://www.trychroma.com) vector database for RAG-enabled LLM ingestion.
|
78
|
+
|
79
|
+
For example, you may wish to load a series of PDF files into a vector database which serves as the backend for a RAG-enabled LLM chatbot. The ``ChromaLoader`` class is specifically designed for this. A single call to the class' loader method results in file retrieval, parsing, splitting, embedding and storage.
|
80
|
+
|
81
|
+
For further detail and usage examples, please refer to the project's [documentation](https://docp.readthedocs.io/).
|
82
|
+
|
83
|
+
|
84
|
+
## Using the Library
|
85
|
+
The documentation suite contains detailed explanation and example usage for each of the library's importable modules. For detailed documentation, usage examples and links the source code itself, please refer to the
|
86
|
+
[Library API](https://docp.readthedocs.io/en/latest/library.html) page in the documentation.
|
87
|
+
|
88
|
+
### Quickstart
|
89
|
+
For convenience, here are a couple examples for how to parse the supported document types.
|
90
|
+
|
91
|
+
**Extract text from a PDF file:**
|
92
|
+
|
93
|
+
>>> from docp import PDFParser
|
94
|
+
|
95
|
+
>>> pdf = PDFParser(path='/path/to/myfile.pdf')
|
96
|
+
>>> pdf.extract_text()
|
97
|
+
|
98
|
+
# Access the content of page 1.
|
99
|
+
>>> pg1 = pdf.doc.pages[1].content
|
100
|
+
|
101
|
+
**Extract text from a PowerPoint presentation:**
|
102
|
+
|
103
|
+
>>> from docp import PPTXParser
|
104
|
+
|
105
|
+
>>> pptx = PPTXParser(path='/path/to/myfile.pptx')
|
106
|
+
>>> pptx.extract_text()
|
107
|
+
|
108
|
+
# Access the text on slide 1.
|
109
|
+
>>> pg1 = pptx.doc.slides[1].content
|
110
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
docp/__init__.py,sha256=Sq2c3NKiE5uhpm3gTMOKqKT7QBOiGR68nj97qypv77w,1220
|
2
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/53aa51172d142c89d9012cce15ae4d6cc0ca6895895114379cacb4fab128d9db.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/59d594003bf59880a884c574bf88ef7555bb0202.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/72b987fd805cfa2b58c4c8c952b274a11bfd5a00.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/8cfec92309f5626a223304af2423e332f6d31887.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/952a9b81c0bfd99800fabf352f69c7ccd46c5e43.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/c79f2b6a0cea6f4b564fed1938984bace9d30ff0.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/cb202bfe2e3c98645018a6d12f182a434c9d3e02.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/d1514c3162bbe87b343f565fadc62e6c06f04f03.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/e7b0375001f109a6b8873d756ad4f7bbb15fbaa5.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fb140275c155a9c7c5a3b3e0e77a9e839594a938.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
+
docp/.cache/.locks/models--sentence-transformers--all-MiniLM-L6-v2/fd1b291129c607e5d49799f87cb219b27f98acdf.lock,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
+
docp/dbs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
+
docp/dbs/chroma.py,sha256=k_HFGN_Lnenf7KCkB5KZArV_d3V9NWgmg1uMnC-tcFI,7885
|
15
|
+
docp/libs/_version.py,sha256=FVHPBGkfhbQDi_z3v0PiKJrXXqXOx0vGW_1VaqNJi7U,22
|
16
|
+
docp/libs/changelog.py,sha256=1GA1gy3GIWlJ1QisiI4w-EQc2i6fXlspiq_H_l-M63o,127
|
17
|
+
docp/libs/utilities.py,sha256=OdbflCbTXum0LAAnH0CZCI8ENcYkVvNj1FDIc5WtjQg,3321
|
18
|
+
docp/loaders/__init__.py,sha256=4NnYhOYiOBsgefWOgEm6-T_lcPABOufYxxB1k19VZqg,1207
|
19
|
+
docp/loaders/_chromabaseloader.py,sha256=qJW5Y7iYkMauLbJaUR2x-lC_FnMsZyIjqlhE-Llk4L8,12915
|
20
|
+
docp/loaders/_chromabaseloader.py.bak,sha256=LJT5_Xvzgcr6g9pFRe061nCmMSMF7AY7xN5MELdCJl8,13868
|
21
|
+
docp/loaders/_chromabasepdfloader.py,sha256=E0DNlsYQLMwptHp63GHbTurU_pw2t_OYFw_PHDd2bEI,4415
|
22
|
+
docp/loaders/_chromabasepptxloader.py,sha256=e79JvNuNzm-zt8tPxiMuRCffqZ1bgBMgB2zG3hJQ-Z0,4531
|
23
|
+
docp/loaders/chroma.py.bak,sha256=qYUYAc0uxlzqitCWFLE-77_7JZ4xrvtsYmA1f_6D5No,7553
|
24
|
+
docp/loaders/chromapdfloader.py,sha256=vllVlhEC8XKkb5h8dK_akj2eFvj1eZafrZ1Lx620yZ8,7707
|
25
|
+
docp/loaders/chromapptxloader.py,sha256=__bMx3AKKTKBLQAoOjkgwcRWtPHuxHopE9dNZ1UI6xA,7358
|
26
|
+
docp/loaders/lutilities.py,sha256=nTNRRGwKpUXAL-RFlbhw2sbB5GIVhA8R7peuDjCvWq8,1706
|
27
|
+
docp/objects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
28
|
+
docp/objects/_docbaseobject.py,sha256=SAoRHuQak6DRvsT6fz3mS8eYthkVK7oGObOtbqQTY5o,1899
|
29
|
+
docp/objects/_imgobject.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
|
+
docp/objects/_pageobject.py,sha256=CxBwDnzPsGIAH7xtu3C9uxmkzc8pTiBINvUVSsHztRg,3989
|
31
|
+
docp/objects/_slideobject.py,sha256=NCHypFOv1UtEugggs64ztouUszZOJIoYGWSr6SSNFbA,3350
|
32
|
+
docp/objects/_tableobject.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
+
docp/objects/_textobject.py,sha256=kgmvfmWBrcRTNzPIRQB3q9rfYp0EO7IjPD8w2JaXNlU,1723
|
34
|
+
docp/objects/pdfobject.py,sha256=Uav4bwDR2-Fm0ByUm2k59cz2KKaHVsIdKOBv5x2JkqY,1784
|
35
|
+
docp/objects/pptxobject.py,sha256=aiCYDoeejNrA5d08Vce-B1BWmVTfr5nWWLBmmlhBKKk,1201
|
36
|
+
docp/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
|
+
docp/parsers/_pdfbaseparser.py,sha256=D9wJlBRhmsSIfjt1QpFFE6I5K2h14aatKyEy6bRdPZw,8622
|
38
|
+
docp/parsers/_pdftableparser.py,sha256=_92gioL7tYJB30u5nPi-QTDEsSFyc0U5__6VHLQUkGY,9862
|
39
|
+
docp/parsers/_pdftextparser.py,sha256=FfBSJBCAKuNP1BwHyHVO8VW8A8UFj-cLJeiaVX0xkLc,10612
|
40
|
+
docp/parsers/_pptxbaseparser.py,sha256=yXvJbqQ8TyxaqhRncmeYoV7Sk9yM77ngrtHiPxgnycM,2886
|
41
|
+
docp/parsers/_pptxtextparser.py,sha256=Bhixdy59BMD-P0AcI5h2qW2_PxrCo_jW2gqOkKBcE1w,3971
|
42
|
+
docp/parsers/pdfparser.py,sha256=GISAxAGUi-CndlsMdKdjl3WQ8MBVfSqaOkjUafWZw3A,1569
|
43
|
+
docp/parsers/pptxparser.py,sha256=cMWbWeOXRb8vMc9KAqxk8htyHaJLAcOnZrQaZh-qPqo,1251
|
44
|
+
docp/parsers/putilities.py,sha256=CzP51VkIA0l3unxinOI25EPe1fTx2JMlNtnYwQlScAU,1304
|
45
|
+
docp-0.2.0.dist-info/LICENSE,sha256=jHWpIx24kGhl4pcG7WOq0Y_ZGMksFdvvuxPCX-QoULQ,32473
|
46
|
+
docp-0.2.0.dist-info/METADATA,sha256=5-7u7sC3RzGOOhACJaUr7GTz6UFWEUK84Jui0f1EPOk,5822
|
47
|
+
docp-0.2.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
48
|
+
docp-0.2.0.dist-info/top_level.txt,sha256=yyw47rnQmi7bCSDJBChTDRPEDoINdEpKqrMLR_kKcJ8,5
|
49
|
+
docp-0.2.0.dist-info/RECORD,,
|
docp/_version.py
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
__version__ = '0.1.0b1'
|
docp-0.1.0b1.dist-info/METADATA
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: docp
|
3
|
-
Version: 0.1.0b1
|
4
|
-
Summary: A basic document parsing and loading utility.
|
5
|
-
Author-email: The Developers <development@s3dev.uk>
|
6
|
-
License: GNU GPL-3
|
7
|
-
Project-URL: Documentation, https://github.com/s3dev/docp
|
8
|
-
Project-URL: Homepage, https://github.com/s3dev/docp
|
9
|
-
Project-URL: Repository, https://github.com/s3dev/docp
|
10
|
-
Keywords: document,library,parsing,utility,utilities
|
11
|
-
Classifier: Development Status :: 4 - Beta
|
12
|
-
Classifier: Intended Audience :: Developers
|
13
|
-
Classifier: Intended Audience :: End Users/Desktop
|
14
|
-
Classifier: Programming Language :: Python :: 3.7
|
15
|
-
Classifier: Programming Language :: Python :: 3.8
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
17
|
-
Classifier: Programming Language :: Python :: 3.11
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
19
|
-
Classifier: Programming Language :: Python :: 3.12
|
20
|
-
Classifier: Programming Language :: Python :: Implementation :: CPython
|
21
|
-
Classifier: Operating System :: POSIX :: Linux
|
22
|
-
Classifier: Operating System :: Microsoft :: Windows
|
23
|
-
Classifier: Topic :: Software Development
|
24
|
-
Classifier: Topic :: Software Development :: Libraries
|
25
|
-
Classifier: Topic :: Utilities
|
26
|
-
Requires-Python: >=3.7
|
27
|
-
Description-Content-Type: text/markdown
|
28
|
-
License-File: LICENSE
|
29
|
-
Requires-Dist: pdfplumber
|
30
|
-
Requires-Dist: pandas
|
31
|
-
Requires-Dist: unidecode
|
32
|
-
Requires-Dist: utils4
|
33
|
-
|
34
|
-
# A basic document parsing and loading utility.
|
35
|
-
|
36
|
-
Currently a **placeholder** for when this project is ready in the near future.
|
37
|
-
|
38
|
-
The ``docp`` project is a CPython library for extracting text from binary documents (e.g. PDF, DOCX, etc.) into Python objects, which can be used across various applications, ranging from simple plain-text extraction to loading the text into a Chroma database for LLM use.
|
39
|
-
|
40
|
-
|
41
|
-
## Installation
|
42
|
-
Coming soon ...
|
43
|
-
|
44
|
-
|
45
|
-
## Toolset
|
46
|
-
Coming soon ...
|
47
|
-
|
48
|
-
|
49
|
-
## Using the Library
|
50
|
-
Coming soon ...
|
51
|
-
|
52
|
-
|
53
|
-
## Additional Information
|
54
|
-
Coming soon ...
|
55
|
-
|
docp-0.1.0b1.dist-info/RECORD
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
docp/__init__.py,sha256=cr4byTQC9zJMkytOeN3bhl-pnEhRnEYBR5m0p3OfdOE,825
|
2
|
-
docp/_version.py,sha256=19UQ_Yy0BegDD9DgyLYY3gXVZ_Qn7NkDtMhNIjm4Rzc,24
|
3
|
-
docp/dbs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
docp/dbs/chroma.py,sha256=FJ7UxlkbxaPL7XReWU8vWCNJPCPMgLfiuhZHzhOpjtQ,7105
|
5
|
-
docp/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
docp/loaders/_chromabaseloader.py,sha256=xJ4LocjoAJvEXK7bxIMcUazBbf9hf7Lgpp1_i6XIOuI,13238
|
7
|
-
docp/loaders/chroma.py,sha256=BoErQc1esOzAECSi1qq6rvR-U_-L9lR3IfdjPW3cdyE,6566
|
8
|
-
docp/objects/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
docp/objects/_docbaseobject.py,sha256=4nRgWqfoNUPw3gHC7wdEvR3cVPSd2spuDoQO2EF0UL4,2333
|
10
|
-
docp/objects/_pageobject.py,sha256=G1rZOudIlJySgzR9BeCySo2XTKZ2FrTuYrAtymnUFwc,3964
|
11
|
-
docp/objects/_tableobject.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
|
-
docp/objects/_textobject.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
13
|
-
docp/objects/pdfobject.py,sha256=iYbEk38lxok75PUoR2YasK1L5PoNU0Qpky_mQihCfN0,1139
|
14
|
-
docp/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
docp/parsers/_pdfbaseparser.py,sha256=mF1QAsapsSxuK7UzB4eETh0ElHGBWJjk5D2zciwdlSI,7780
|
16
|
-
docp/parsers/_pdftableparser.py,sha256=i-E4Mk8yZhoWlkCmHcj8uIXXqdw_BYV8lM8T0uq431A,9902
|
17
|
-
docp/parsers/_pdftextparser.py,sha256=I-o3tsuOxMyEfnlzbC2Sk7uF65SgnpexcmFgZ3jCzRg,10419
|
18
|
-
docp/parsers/pdfparser.py,sha256=GISAxAGUi-CndlsMdKdjl3WQ8MBVfSqaOkjUafWZw3A,1569
|
19
|
-
docp-0.1.0b1.dist-info/LICENSE,sha256=jHWpIx24kGhl4pcG7WOq0Y_ZGMksFdvvuxPCX-QoULQ,32473
|
20
|
-
docp-0.1.0b1.dist-info/METADATA,sha256=vZvUveAJP-JVkSPEPMR1S97j4L55dLsFl9LTX09yoKw,1866
|
21
|
-
docp-0.1.0b1.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
22
|
-
docp-0.1.0b1.dist-info/top_level.txt,sha256=yyw47rnQmi7bCSDJBChTDRPEDoINdEpKqrMLR_kKcJ8,5
|
23
|
-
docp-0.1.0b1.dist-info/RECORD,,
|
File without changes
|
File without changes
|