gllm-docproc-binary 0.7.21__cp311-cp311-macosx_13_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. gllm_docproc/__init__.pyi +0 -0
  2. gllm_docproc/chunker/__init__.pyi +3 -0
  3. gllm_docproc/chunker/base_chunker.pyi +28 -0
  4. gllm_docproc/chunker/structured_element/__init__.pyi +3 -0
  5. gllm_docproc/chunker/structured_element/chunk_enricher.pyi +43 -0
  6. gllm_docproc/chunker/structured_element/structured_element_chunker.pyi +80 -0
  7. gllm_docproc/chunker/table/__init__.pyi +3 -0
  8. gllm_docproc/chunker/table/table_chunker.pyi +45 -0
  9. gllm_docproc/converter/__init__.pyi +3 -0
  10. gllm_docproc/converter/base_converter.pyi +15 -0
  11. gllm_docproc/data_generator/__init__.pyi +5 -0
  12. gllm_docproc/data_generator/base_data_generator.pyi +18 -0
  13. gllm_docproc/data_generator/image_data_generator/__init__.pyi +4 -0
  14. gllm_docproc/data_generator/image_data_generator/image_caption_data_generator.pyi +40 -0
  15. gllm_docproc/data_generator/image_data_generator/multi_model_image_caption_data_generator.pyi +51 -0
  16. gllm_docproc/data_generator/pii_data_generator/__init__.pyi +1 -0
  17. gllm_docproc/downloader/__init__.pyi +5 -0
  18. gllm_docproc/downloader/base_downloader.pyi +19 -0
  19. gllm_docproc/downloader/direct_file_url_downloader.pyi +40 -0
  20. gllm_docproc/downloader/google_drive_downloader.pyi +36 -0
  21. gllm_docproc/downloader/html/__init__.pyi +6 -0
  22. gllm_docproc/downloader/html/exception/__init__.pyi +4 -0
  23. gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi +16 -0
  24. gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi +15 -0
  25. gllm_docproc/downloader/html/firecrawl_downloader.pyi +49 -0
  26. gllm_docproc/downloader/html/html_downloader.pyi +114 -0
  27. gllm_docproc/downloader/html/requests_downloader.pyi +46 -0
  28. gllm_docproc/downloader/html/scraper/__init__.pyi +0 -0
  29. gllm_docproc/downloader/html/scraper/scraper/__init__.pyi +0 -0
  30. gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi +9 -0
  31. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi +27 -0
  32. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi +28 -0
  33. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi +61 -0
  34. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi +66 -0
  35. gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi +22 -0
  36. gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi +57 -0
  37. gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi +51 -0
  38. gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi +43 -0
  39. gllm_docproc/downloader/html/utils/__init__.pyi +3 -0
  40. gllm_docproc/downloader/html/utils/web_utils.pyi +39 -0
  41. gllm_docproc/dpo_router/__init__.pyi +5 -0
  42. gllm_docproc/dpo_router/base_dpo_router.pyi +16 -0
  43. gllm_docproc/dpo_router/loader_router.pyi +52 -0
  44. gllm_docproc/dpo_router/parser_router.pyi +42 -0
  45. gllm_docproc/housekeeping/__init__.pyi +3 -0
  46. gllm_docproc/housekeeping/base_housekeeping.pyi +14 -0
  47. gllm_docproc/indexer/__init__.pyi +3 -0
  48. gllm_docproc/indexer/base_indexer.pyi +30 -0
  49. gllm_docproc/indexer/graph/__init__.pyi +4 -0
  50. gllm_docproc/indexer/graph/graph_rag_indexer.pyi +11 -0
  51. gllm_docproc/indexer/graph/light_rag_graph_rag_indexer.pyi +97 -0
  52. gllm_docproc/indexer/graph/llama_index_graph_rag_indexer.pyi +63 -0
  53. gllm_docproc/loader/__init__.pyi +4 -0
  54. gllm_docproc/loader/audio/__init__.pyi +3 -0
  55. gllm_docproc/loader/audio/audio_loader.pyi +45 -0
  56. gllm_docproc/loader/base_loader.pyi +30 -0
  57. gllm_docproc/loader/csv/__init__.pyi +3 -0
  58. gllm_docproc/loader/csv/pandas_loader.pyi +53 -0
  59. gllm_docproc/loader/docx/__init__.pyi +5 -0
  60. gllm_docproc/loader/docx/docx2python_loader.pyi +46 -0
  61. gllm_docproc/loader/docx/python_docx_loader.pyi +35 -0
  62. gllm_docproc/loader/docx/python_docx_table_loader.pyi +35 -0
  63. gllm_docproc/loader/exception/__init__.pyi +4 -0
  64. gllm_docproc/loader/exception/unsupported_file_extension_error.pyi +7 -0
  65. gllm_docproc/loader/exception/video_conversion_error.pyi +12 -0
  66. gllm_docproc/loader/html/__init__.pyi +5 -0
  67. gllm_docproc/loader/html/exception/__init__.pyi +3 -0
  68. gllm_docproc/loader/html/exception/html_load_exception.pyi +7 -0
  69. gllm_docproc/loader/html/flat/__init__.pyi +3 -0
  70. gllm_docproc/loader/html/flat/html_flat_base_handler.pyi +65 -0
  71. gllm_docproc/loader/html/flat/html_flat_loader.pyi +30 -0
  72. gllm_docproc/loader/html/flat/html_flat_merger.pyi +22 -0
  73. gllm_docproc/loader/html/html_base_loader.pyi +25 -0
  74. gllm_docproc/loader/html/nested/__init__.pyi +3 -0
  75. gllm_docproc/loader/html/nested/dictionary_utils.pyi +40 -0
  76. gllm_docproc/loader/html/nested/html_nested_base_handler.pyi +128 -0
  77. gllm_docproc/loader/html/nested/html_nested_element_handler.pyi +24 -0
  78. gllm_docproc/loader/html/nested/html_nested_loader.pyi +15 -0
  79. gllm_docproc/loader/html/utils/__init__.pyi +0 -0
  80. gllm_docproc/loader/html/utils/flat_table_utils.pyi +44 -0
  81. gllm_docproc/loader/html/utils/html_utils.pyi +41 -0
  82. gllm_docproc/loader/html/utils/removed_components.pyi +53 -0
  83. gllm_docproc/loader/html/utils/string_utils.pyi +33 -0
  84. gllm_docproc/loader/html/utils/table_utils.pyi +78 -0
  85. gllm_docproc/loader/image/__init__.pyi +3 -0
  86. gllm_docproc/loader/image/image_loader.pyi +54 -0
  87. gllm_docproc/loader/json/__init__.pyi +3 -0
  88. gllm_docproc/loader/json/json_elements_loader.pyi +35 -0
  89. gllm_docproc/loader/loader_utils.pyi +43 -0
  90. gllm_docproc/loader/pdf/__init__.pyi +14 -0
  91. gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi +37 -0
  92. gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi +47 -0
  93. gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi +49 -0
  94. gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi +38 -0
  95. gllm_docproc/loader/pdf/pdf_loader_utils.pyi +59 -0
  96. gllm_docproc/loader/pdf/pdf_miner_loader.pyi +38 -0
  97. gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi +33 -0
  98. gllm_docproc/loader/pdf/pdf_page_loader.pyi +41 -0
  99. gllm_docproc/loader/pdf/pdf_plumber_loader.pyi +35 -0
  100. gllm_docproc/loader/pdf/pymupdf_loader.pyi +55 -0
  101. gllm_docproc/loader/pdf/pymupdf_span_loader.pyi +56 -0
  102. gllm_docproc/loader/pdf/pymupdf_utils.pyi +77 -0
  103. gllm_docproc/loader/pdf/tabula_loader.pyi +32 -0
  104. gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi +37 -0
  105. gllm_docproc/loader/pipeline_loader.pyi +48 -0
  106. gllm_docproc/loader/pptx/__init__.pyi +3 -0
  107. gllm_docproc/loader/pptx/python_pptx_loader.pyi +48 -0
  108. gllm_docproc/loader/txt/__init__.pyi +3 -0
  109. gllm_docproc/loader/txt/txt_loader.pyi +55 -0
  110. gllm_docproc/loader/video/__init__.pyi +3 -0
  111. gllm_docproc/loader/video/video_loader_utils.pyi +97 -0
  112. gllm_docproc/loader/video/video_transcript_loader.pyi +59 -0
  113. gllm_docproc/loader/xlsx/__init__.pyi +3 -0
  114. gllm_docproc/loader/xlsx/openpyxl_loader.pyi +36 -0
  115. gllm_docproc/model/__init__.pyi +7 -0
  116. gllm_docproc/model/element.pyi +38 -0
  117. gllm_docproc/model/element_metadata.pyi +35 -0
  118. gllm_docproc/model/loader_type.pyi +20 -0
  119. gllm_docproc/model/media.pyi +51 -0
  120. gllm_docproc/model/parser_type.pyi +19 -0
  121. gllm_docproc/parser/__init__.pyi +4 -0
  122. gllm_docproc/parser/base_parser.pyi +28 -0
  123. gllm_docproc/parser/document/__init__.pyi +7 -0
  124. gllm_docproc/parser/document/docx_parser.pyi +27 -0
  125. gllm_docproc/parser/document/pdf_parser.pyi +35 -0
  126. gllm_docproc/parser/document/pptx_parser.pyi +34 -0
  127. gllm_docproc/parser/document/txt_parser.pyi +22 -0
  128. gllm_docproc/parser/document/xlsx_parser.pyi +26 -0
  129. gllm_docproc/parser/html/__init__.pyi +4 -0
  130. gllm_docproc/parser/html/flat/__init__.pyi +0 -0
  131. gllm_docproc/parser/html/flat/html_flat_parser.pyi +27 -0
  132. gllm_docproc/parser/html/nested/__init__.pyi +0 -0
  133. gllm_docproc/parser/html/nested/html_json_processor.pyi +158 -0
  134. gllm_docproc/parser/html/nested/html_nested_parser.pyi +24 -0
  135. gllm_docproc/parser/html/nested/nested_element.pyi +31 -0
  136. gllm_docproc/parser/image/__init__.pyi +4 -0
  137. gllm_docproc/parser/image/image_mime_normalization_parser.pyi +43 -0
  138. gllm_docproc/parser/image/image_plain_small_filter_parser.pyi +45 -0
  139. gllm_docproc/parser/pipeline_parser.pyi +33 -0
  140. gllm_docproc/parser/table/__init__.pyi +3 -0
  141. gllm_docproc/parser/table/table_caption_parser.pyi +66 -0
  142. gllm_docproc/request_handler/__init__.pyi +3 -0
  143. gllm_docproc/request_handler/base_request_handler.pyi +16 -0
  144. gllm_docproc/response_handler/__init__.pyi +3 -0
  145. gllm_docproc/response_handler/base_response_handler.pyi +38 -0
  146. gllm_docproc/utils/__init__.pyi +3 -0
  147. gllm_docproc/utils/async_utils.pyi +22 -0
  148. gllm_docproc/utils/file_utils.pyi +76 -0
  149. gllm_docproc/utils/html_constants.pyi +122 -0
  150. gllm_docproc/validator/__init__.pyi +6 -0
  151. gllm_docproc/validator/base_validator.pyi +34 -0
  152. gllm_docproc/validator/character_count_validator.pyi +26 -0
  153. gllm_docproc/validator/file_size_validator.pyi +20 -0
  154. gllm_docproc/validator/model/__init__.pyi +4 -0
  155. gllm_docproc/validator/model/validator_input.pyi +50 -0
  156. gllm_docproc/validator/model/validator_result.pyi +19 -0
  157. gllm_docproc/validator/page_count_validator.pyi +23 -0
  158. gllm_docproc/validator/pipeline_validator.pyi +40 -0
  159. gllm_docproc.build/.gitignore +1 -0
  160. gllm_docproc.cpython-311-darwin.so +0 -0
  161. gllm_docproc.pyi +213 -0
  162. gllm_docproc_binary-0.7.21.dist-info/METADATA +216 -0
  163. gllm_docproc_binary-0.7.21.dist-info/RECORD +165 -0
  164. gllm_docproc_binary-0.7.21.dist-info/WHEEL +5 -0
  165. gllm_docproc_binary-0.7.21.dist-info/top_level.txt +1 -0
gllm_docproc.pyi ADDED
@@ -0,0 +1,213 @@
1
+ # This file was generated by Nuitka
2
+
3
+ # Stubs included by default
4
+
5
+
6
+ __name__ = ...
7
+
8
+
9
+
10
+ # Modules used internally, to allow implicit dependencies to be seen:
11
+ import os
12
+ import abc
13
+ import typing
14
+ import hashlib
15
+ import inspect
16
+ import re
17
+ import langchain_text_splitters
18
+ import gllm_docproc.chunker.table.TableChunker
19
+ import pandas
20
+ import asyncio
21
+ import concurrent
22
+ import concurrent.futures
23
+ import concurrent.futures.ThreadPoolExecutor
24
+ import gllm_multimodal
25
+ import gllm_multimodal.constants
26
+ import gllm_multimodal.modality_converter
27
+ import gllm_multimodal.modality_converter.image_to_text
28
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption
29
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption.image_to_caption
30
+ import json
31
+ import gllm_core
32
+ import gllm_core.utils
33
+ import gllm_core.utils.logger_manager
34
+ import gllm_core.utils.retry
35
+ import gllm_inference
36
+ import gllm_inference.builder
37
+ import gllm_inference.output_parser
38
+ import gllm_inference.prompt_builder
39
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption.preset_image_to_caption
40
+ import gllm_privacy
41
+ import gllm_privacy.pii_detector
42
+ import gllm_privacy.pii_detector.text_analyzer
43
+ import gllm_privacy.pii_detector.text_anonymizer
44
+ import langdetect
45
+ import mimetypes
46
+ import time
47
+ import uuid
48
+ import pathlib
49
+ import magic
50
+ import requests
51
+ import requests.adapters
52
+ import urllib3
53
+ import urllib3.util
54
+ import urllib3.util.retry
55
+ import gllm_docproc.downloader.BaseDownloader
56
+ import posixpath
57
+ import bosa_connectors
58
+ import bosa_connectors.connector
59
+ import bosa_connectors.models
60
+ import bosa_connectors.models.file
61
+ import datetime
62
+ import firecrawl
63
+ import pydantic
64
+ import copy
65
+ import html_to_markdown
66
+ import scrapy
67
+ import gllm_docproc.downloader.html.exception.ItemScrapeFailedException
68
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlBaseSpider
69
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapLinkSpider
70
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapSpider
71
+ import gllm_docproc.downloader.html.utils.generate_filename_from_url
72
+ import gllm_docproc.downloader.html.utils.is_valid_url
73
+ import scrapy.http
74
+ import scrapy_playwright
75
+ import scrapy_playwright.page
76
+ import gllm_docproc.downloader.html.utils.clean_url
77
+ import urllib
78
+ import urllib.parse
79
+ import scrapy.crawler
80
+ import scrapy.spiders
81
+ import scrapy.spiders.sitemap
82
+ import scrapy.utils
83
+ import scrapy.utils.sitemap
84
+ import scrapy.linkextractors
85
+ import billiard
86
+ import gllm_docproc.downloader.html.exception.ZyteApiKeyNotProvidedException
87
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ScrapeSpider
88
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ZyteScrapeSpider
89
+ import gllm_multimodal.utils
90
+ import gllm_multimodal.utils.audio_to_text_utils
91
+ import gllm_docproc.loader.image.ImageLoader
92
+ import gllm_docproc.loader.txt.TXTLoader
93
+ import gllm_docproc.model.Element
94
+ import gllm_docproc.model.LoaderType
95
+ import gllm_docproc.indexer.BaseIndexer
96
+ import gllm_datastore
97
+ import gllm_datastore.graph_data_store
98
+ import gllm_datastore.graph_data_store.light_rag_data_store
99
+ import lightrag
100
+ import lightrag.lightrag
101
+ import gllm_datastore.graph_data_store.llama_index_graph_rag_data_store
102
+ import gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store
103
+ import llama_index
104
+ import llama_index.core
105
+ import llama_index.core.base
106
+ import llama_index.core.base.embeddings
107
+ import llama_index.core.base.embeddings.base
108
+ import llama_index.core.base.llms
109
+ import llama_index.core.base.llms.base
110
+ import llama_index.core.indices
111
+ import llama_index.core.indices.property_graph
112
+ import llama_index.core.indices.property_graph.transformations
113
+ import llama_index.core.schema
114
+ import llama_index.core.vector_stores
115
+ import llama_index.core.vector_stores.types
116
+ import gllm_multimodal.modality_converter.audio_to_text
117
+ import gllm_multimodal.modality_converter.audio_to_text.audio_to_text
118
+ import gllm_multimodal.modality_converter.schema
119
+ import csv
120
+ import base64
121
+ import docx2python
122
+ import docx2python.docx_output
123
+ import docx
124
+ import docx.table
125
+ import docx.text
126
+ import docx.text.paragraph
127
+ import gllm_docproc.loader.html.flat.HTMLFlatLoader
128
+ import gllm_docproc.loader.html.nested.HTMLNestedLoader
129
+ import parsel
130
+ import cairosvg
131
+ import gllm_docproc.loader.html.exception.HtmlLoadException
132
+ import tabulate
133
+ import itertools
134
+ import __future__
135
+ import re.sub
136
+ import w3lib
137
+ import w3lib.html
138
+ import io
139
+ import PIL
140
+ import gllm_docproc.loader.exception.UnsupportedFileExtensionError
141
+ import zipfile
142
+ import adobe
143
+ import adobe.pdfservices
144
+ import adobe.pdfservices.operation
145
+ import adobe.pdfservices.operation.auth
146
+ import adobe.pdfservices.operation.auth.service_principal_credentials
147
+ import adobe.pdfservices.operation.io
148
+ import adobe.pdfservices.operation.io.cloud_asset
149
+ import adobe.pdfservices.operation.io.stream_asset
150
+ import adobe.pdfservices.operation.pdf_services
151
+ import adobe.pdfservices.operation.pdf_services_media_type
152
+ import adobe.pdfservices.operation.pdf_services_response
153
+ import adobe.pdfservices.operation.pdfjobs
154
+ import adobe.pdfservices.operation.pdfjobs.jobs
155
+ import adobe.pdfservices.operation.pdfjobs.jobs.extract_pdf_job
156
+ import adobe.pdfservices.operation.pdfjobs.params
157
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf
158
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_element_type
159
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_pdf_params
160
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_renditions_element_type
161
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.table_structure_type
162
+ import adobe.pdfservices.operation.pdfjobs.result
163
+ import adobe.pdfservices.operation.pdfjobs.result.extract_pdf_result
164
+ import fitz
165
+ import azure
166
+ import azure.ai
167
+ import azure.ai.documentintelligence
168
+ import azure.ai.documentintelligence.models
169
+ import azure.core
170
+ import azure.core.credentials
171
+ import collections
172
+ import collections.Counter
173
+ import pdfminer
174
+ import pdfminer.high_level
175
+ import pdfminer.layout
176
+ import gllm_docproc.loader.BaseLoader
177
+ import pdfplumber
178
+ import pdfplumber._typing
179
+ import pdfplumber.page
180
+ import pdfplumber.table
181
+ import math
182
+ import numpy
183
+ import tabula
184
+ import tabula.io
185
+ import gllm_datastore.cache
186
+ import gllm_datastore.cache.hybrid_cache
187
+ import gllm_datastore.cache.hybrid_cache.hybrid_cache
188
+ import gllm_datastore.cache.hybrid_cache.utils
189
+ import pptx
190
+ import pptx.chart
191
+ import pptx.chart.chart
192
+ import pptx.shapes
193
+ import pptx.shapes.base
194
+ import pptx.table
195
+ import sys
196
+ import soundfile
197
+ import scipy
198
+ import gllm_docproc.loader.exception.VideoConversionError
199
+ import gi
200
+ import gi.repository
201
+ import gllm_docproc.utils.run_async_in_sync
202
+ import openpyxl
203
+ import openpyxl.cell
204
+ import openpyxl.cell.cell
205
+ import openpyxl.worksheet
206
+ import openpyxl.worksheet.worksheet
207
+ import enum
208
+ import gllm_docproc.parser.BaseParser
209
+ import subprocess
210
+ import tempfile
211
+ import gllm_multimodal.utils.image_utils
212
+ import codecs
213
+ import types
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.2
2
+ Name: gllm-docproc-binary
3
+ Version: 0.7.21
4
+ Summary: A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
5
+ Author-email: GenAI SDK Team <gat-sdk@gdplabs.id>
6
+ Requires-Python: <3.13,>=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: bosa-connectors-binary<0.4.0,>=0.3.0
9
+ Requires-Dist: gllm-core-binary<0.4.0,>=0.3.0
10
+ Requires-Dist: gllm-datastore-binary[chroma,elasticsearch]<0.6.0,>=0.5.0
11
+ Requires-Dist: gllm-multimodal-binary[audio]<0.3.0,>=0.2.0
12
+ Requires-Dist: gllm-privacy-binary<0.5.0,>=0.4.0
13
+ Requires-Dist: langchain-text-splitters<0.4.0,>=0.3.2
14
+ Requires-Dist: pandas<3.0.0,>=2.2.3
15
+ Requires-Dist: pydantic<3.0.0,>=2.9.1
16
+ Requires-Dist: tabulate<0.10.0,>=0.9.0
17
+ Requires-Dist: python-magic<0.5.0,>=0.4.27; sys_platform != "win32"
18
+ Requires-Dist: python-magic-bin<0.5.0,>=0.4.14; sys_platform == "win32"
19
+ Provides-Extra: dev
20
+ Requires-Dist: coverage<8.0.0,>=7.4.4; extra == "dev"
21
+ Requires-Dist: mypy<2.0.0,>=1.15.0; extra == "dev"
22
+ Requires-Dist: pre-commit<4.0.0,>=3.7.0; extra == "dev"
23
+ Requires-Dist: pytest<9.0.0,>=8.1.1; extra == "dev"
24
+ Requires-Dist: pytest-asyncio<1.0.0,>=0.23.6; extra == "dev"
25
+ Requires-Dist: pytest-cov<6.0.0,>=5.0.0; extra == "dev"
26
+ Requires-Dist: ruff<1.0.0,>=0.6.7; extra == "dev"
27
+ Provides-Extra: audio
28
+ Requires-Dist: librosa<0.11.0,>=0.10.1; extra == "audio"
29
+ Requires-Dist: tqdm<5.0.0,>=4.66.2; extra == "audio"
30
+ Provides-Extra: docx
31
+ Requires-Dist: docx2python<3.0.0,>=2.8.0; extra == "docx"
32
+ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == "docx"
33
+ Provides-Extra: html
34
+ Requires-Dist: billiard<5.0.0,>=4.2.1; extra == "html"
35
+ Requires-Dist: firecrawl-py<5.0.0,>=4.3.6; extra == "html"
36
+ Requires-Dist: html-to-markdown<2.0.0,>=1.9.0; extra == "html"
37
+ Requires-Dist: playwright<2.0.0,>=1.40.0; extra == "html"
38
+ Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == "html"
39
+ Requires-Dist: scrapy-playwright<0.1.0,>=0.0.33; extra == "html"
40
+ Requires-Dist: scrapy-zyte-api<1.0.0,>=0.12.2; extra == "html"
41
+ Requires-Dist: zyte-api<1.0.0,>=0.4.8; extra == "html"
42
+ Provides-Extra: html-svg
43
+ Requires-Dist: cairosvg<3.0.0,>=2.8.2; extra == "html-svg"
44
+ Provides-Extra: image
45
+ Requires-Dist: aioresponses<1.0.0,>=0.7.0; extra == "image"
46
+ Requires-Dist: boto3<2.0.0,>=1.38.10; extra == "image"
47
+ Requires-Dist: pillow<12.0.0,>=11.2.1; extra == "image"
48
+ Provides-Extra: kg
49
+ Requires-Dist: asyncpg<1.0.0,>=0.30.0; extra == "kg"
50
+ Requires-Dist: gllm-datastore-binary[kg]<0.6.0,>=0.5.0; extra == "kg"
51
+ Requires-Dist: lightrag-hku<2.0.0,>=1.4.6; extra == "kg"
52
+ Requires-Dist: llama-index-embeddings-openai<1.0.0,>=0.3.0; extra == "kg"
53
+ Requires-Dist: llama-index-llms-openai<1.0.0,>=0.3.0; extra == "kg"
54
+ Provides-Extra: pdf
55
+ Requires-Dist: azure-ai-documentintelligence<2.0.0,>=1.0.0b3; extra == "pdf"
56
+ Requires-Dist: jpype1<2.0.0,>=1.5.0; extra == "pdf"
57
+ Requires-Dist: pdfminer-six<20250000,>=20231228; extra == "pdf"
58
+ Requires-Dist: pdfplumber<1.0.0,>=0.11.4; extra == "pdf"
59
+ Requires-Dist: pdfservices-sdk<5.0.0,>=4.0.0; extra == "pdf"
60
+ Requires-Dist: pymupdf<2.0.0,>=1.24.10; extra == "pdf"
61
+ Requires-Dist: tabula-py<3.0.0,>=2.9.3; extra == "pdf"
62
+ Provides-Extra: pii
63
+ Requires-Dist: langdetect<2.0.0,>=1.0.0; extra == "pii"
64
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "pii"
65
+ Provides-Extra: pptx
66
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2; extra == "pptx"
67
+ Provides-Extra: video
68
+ Requires-Dist: PyGObject==3.50.0; sys_platform != "win32" and extra == "video"
69
+ Requires-Dist: numpy<2.0.0,>=1.26.0; extra == "video"
70
+ Requires-Dist: scipy<2.0.0,>=1.15.0; extra == "video"
71
+ Requires-Dist: soundfile<0.14.0,>=0.13.1; extra == "video"
72
+ Provides-Extra: xlsx
73
+ Requires-Dist: openpyxl<4.0.0,>=3.0.10; extra == "xlsx"
74
+
75
+ # GLLM Docproc
76
+
77
+ ## Description
78
+ A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
79
+
80
+ ---
81
+
82
+ ## Installation
83
+
84
+ ### Prerequisites
85
+
86
+ Mandatory:
87
+ 1. Python 3.11+ — [Install here](https://www.python.org/downloads/)
88
+ 2. pip — [Install here](https://pip.pypa.io/en/stable/installation/)
89
+ 3. uv — [Install here](https://docs.astral.sh/uv/getting-started/installation/)
90
+ 4. gcloud CLI (for authentication) — [Install here](https://cloud.google.com/sdk/docs/install), then log in using:
91
+ ```bash
92
+ gcloud auth login
93
+ ```
94
+
95
+ ---
96
+
97
+ ### Install from Artifact Registry
98
+
99
+ This requires authentication via the `gcloud` CLI.
100
+
101
+ 1. Export token
102
+ ```
103
+ export GCLOUD_ACCESS_TOKEN="$(gcloud auth print-access-token)"
104
+ ```
105
+
106
+ 2. Configure the index in your `pyproject.tom;`
107
+ ```
108
+ [[tool.uv.index]]
109
+ name = "gen-ai-internal"
110
+ url = "https://oauth2accesstoken:${GCLOUD_ACCESS_TOKEN}@glsdk.gdplabs.id/gen-ai-internal/simple/"
111
+ ```
112
+
113
+ 3. Add the dependency
114
+ ```
115
+ uv add gllm-docproc
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Local Development Setup
121
+
122
+ ### Prerequisites
123
+
124
+ 1. Python 3.11+ — [Install here](https://www.python.org/downloads/)
125
+ 2. pip — [Install here](https://pip.pypa.io/en/stable/installation/)
126
+ 3. uv — [Install here](https://docs.astral.sh/uv/getting-started/installation/)
127
+ 4. gcloud CLI — [Install here](https://cloud.google.com/sdk/docs/install), then log in using:
128
+
129
+ ```bash
130
+ gcloud auth login
131
+ ```
132
+ 5. Git — [Install here](https://git-scm.com/downloads)
133
+ 6. Access to the [GDP Labs SDK GitHub repository](https://github.com/GDP-ADMIN/gl-sdk)
134
+
135
+ ---
136
+
137
+ ### 1. Clone Repository
138
+
139
+ ```bash
140
+ git clone git@github.com:GDP-ADMIN/gl-sdk.git
141
+ cd gl-sdk/libs/gllm-docproc
142
+ ```
143
+
144
+ ---
145
+
146
+ ### 2. Setup Authentication
147
+
148
+ Set the following environment variables to authenticate with internal package indexes:
149
+
150
+ ```bash
151
+ export UV_INDEX_GEN_AI_INTERNAL_USERNAME=oauth2accesstoken
152
+ export UV_INDEX_GEN_AI_INTERNAL_PASSWORD="$(gcloud auth print-access-token)"
153
+ export UV_INDEX_GEN_AI_USERNAME=oauth2accesstoken
154
+ export UV_INDEX_GEN_AI_PASSWORD="$(gcloud auth print-access-token)"
155
+ ```
156
+
157
+ ---
158
+
159
+ ### 3. Quick Setup
160
+
161
+ Run:
162
+
163
+ ```bash
164
+ make setup
165
+ ```
166
+
167
+ ---
168
+
169
+ ### 4. Activate Virtual Environment
170
+
171
+ ```bash
172
+ source .venv/bin/activate
173
+ ```
174
+
175
+ ---
176
+
177
+ ## Local Development Utilities
178
+
179
+ The following Makefile commands are available for quick operations:
180
+
181
+ ### Install uv
182
+
183
+ ```bash
184
+ make install-uv
185
+ ```
186
+
187
+ ### Install Pre-Commit
188
+
189
+ ```bash
190
+ make install-pre-commit
191
+ ```
192
+
193
+ ### Install Dependencies
194
+
195
+ ```bash
196
+ make install
197
+ ```
198
+
199
+ ### Update Dependencies
200
+
201
+ ```bash
202
+ make update
203
+ ```
204
+
205
+ ### Run Tests
206
+
207
+ ```bash
208
+ make test
209
+ ```
210
+
211
+ ---
212
+
213
+ ## Contributing
214
+
215
+ Please refer to the [Python Style Guide](https://docs.google.com/document/d/1uRggCrHnVfDPBnG641FyQBwUwLoFw0kTzNqRm92vUwM/edit?usp=sharing)
216
+ for information about code style, documentation standards, and SCA requirements.
@@ -0,0 +1,165 @@
1
+ gllm_docproc.cpython-311-darwin.so,sha256=cNQnIOMsrBljkEVHovsb-zJ9M7xQC9lUhOFZDZrmR9s,5858752
2
+ gllm_docproc.pyi,sha256=sRmLUiPtWTonSayWwtYiC--fJxKFaThknizuX5cXydk,6902
3
+ gllm_docproc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ gllm_docproc/chunker/__init__.pyi,sha256=eA7-yNoiTmdgdk8KUEiFSMNu4nFm7J-D7BV-7J4_QgQ,80
5
+ gllm_docproc/chunker/base_chunker.pyi,sha256=qvUhHYl0Mj-aRDW_uf59gWCWyIyUtnDayZaZTR4-C9I,1132
6
+ gllm_docproc/chunker/structured_element/__init__.pyi,sha256=FGOqqpZRBIMbsHGRiS3DehxJ3dG6wOYTBYThVqytiSI,133
7
+ gllm_docproc/chunker/structured_element/chunk_enricher.pyi,sha256=78w45UE31wcXOR3Xl4dtYwzcgPQS5EHxKuodfEpOAtY,1720
8
+ gllm_docproc/chunker/structured_element/structured_element_chunker.pyi,sha256=Ys7vSVDUOj2cMOYClnUpJbNtzSfzakDrHFfZTSdSeb4,4077
9
+ gllm_docproc/chunker/table/__init__.pyi,sha256=oYBcaQP91xP32ITWWhYezzpYkIytbZ_P9VrCpmbQmjM,159
10
+ gllm_docproc/chunker/table/table_chunker.pyi,sha256=Nb4xDLl5ZR_N8ilRmHZyviNaqHEbeuj0WcNL7owVrP0,1853
11
+ gllm_docproc/converter/__init__.pyi,sha256=t8FWQ6ivPnaIHXKD9tNlaelmtXIguvEPwgNUobwHobE,88
12
+ gllm_docproc/converter/base_converter.pyi,sha256=iyP3RzkxeQvkn0Pmonxk6mljgDoJVYpQ6ketIhZXDyA,427
13
+ gllm_docproc/data_generator/__init__.pyi,sha256=uk6fwUT9e0l7fBr-9YAckgDIByosjrHU_kUJmQgSLHE,522
14
+ gllm_docproc/data_generator/base_data_generator.pyi,sha256=Asap_mx_bWBGPWaAEzLZtRDpFOe2JxcYm2APgDjgB9I,772
15
+ gllm_docproc/data_generator/image_data_generator/__init__.pyi,sha256=w19oyxFACC7FBV8nAz7zRmqlJuT7JuI3oRjVodxIvoM,402
16
+ gllm_docproc/data_generator/image_data_generator/image_caption_data_generator.pyi,sha256=EB_3-dtgwxa6kHIdO3erblT27UTbI2w_gyJE1lyY4a4,2120
17
+ gllm_docproc/data_generator/image_data_generator/multi_model_image_caption_data_generator.pyi,sha256=10Wf94Gjq54me8VF8v4xEIW1qXSCKr5xH4BXYFT3pnU,2720
18
+ gllm_docproc/data_generator/pii_data_generator/__init__.pyi,sha256=VaDnIoqdremFO6xCyLLwEzT9Aq6_4jT-Hbi_8OXFpTI,122
19
+ gllm_docproc/downloader/__init__.pyi,sha256=qWnleQX8bx1jJRs9wZFWc4cd419ePU9LmCZdfYw1q0k,319
20
+ gllm_docproc/downloader/base_downloader.pyi,sha256=BBpGEVCPAkYHXgpPWRBYCY5iQO-KVpxnuMhJDMJ4ao0,813
21
+ gllm_docproc/downloader/direct_file_url_downloader.pyi,sha256=i6B6n47YXu933r889PIvmIGI280E5wTYO_0NBbcGDqk,1822
22
+ gllm_docproc/downloader/google_drive_downloader.pyi,sha256=6A_RUCoioTsk81p8Vfuz4DMP2-Q9YYDoanKRkUyUSaE,1553
23
+ gllm_docproc/downloader/html/__init__.pyi,sha256=Vi06UALRn056EiOX3FIqwJpTBsf5mm-dOoUT9G-BYpY,338
24
+ gllm_docproc/downloader/html/firecrawl_downloader.pyi,sha256=CYs34A-DaTtUy_2WtyWaFYFH0zO70YFXbhD-bS0uLGg,2423
25
+ gllm_docproc/downloader/html/html_downloader.pyi,sha256=cm8tBuI-F9Uo96bWkm3_-T98CpUXCzCn0yp-uRAxCc4,5851
26
+ gllm_docproc/downloader/html/requests_downloader.pyi,sha256=CKXch7MYSw1tDEFjpEy_-NCWOVpKTbFmmkYIuxMkFvA,2355
27
+ gllm_docproc/downloader/html/exception/__init__.pyi,sha256=ZEV6EjWuDZ6Rr-mRm3gad8DuvmNivr2v8fYVXxFB-ks,286
28
+ gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi,sha256=MrQnR6wuxec85q-54QZ27kCnw4p4CUj8hawfAMldVCE,607
29
+ gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi,sha256=gjX1ovxTSMQ3dCgYA-434Hp-rURVB5xEtlfMVvpsAmU,589
30
+ gllm_docproc/downloader/html/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi,sha256=6vsdbmBsIN-MfxuAII1M2dYBvBI3uKtkEd7M6JRGx2M,2112
32
+ gllm_docproc/downloader/html/scraper/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi,sha256=oPIUQoIMRMTuyUOMvMZgzRjyVybMYNqlrDmD5ewMWVI,658
34
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi,sha256=suD5on_SoYZkeaIKJGM_5fg_lKwY2Z1sIbuMwsMAHjo,1339
35
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi,sha256=THBhGOCYmdZJsN9yHQOdMUJh85HPtmb9K1x5d2LAOsQ,1120
36
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi,sha256=C3-DzmzYXDv8j4ROoP2AEgOQ6phTI9dAmx5abPXXFNA,2544
37
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi,sha256=SJgRvKNw-wVsc7oCG2z4OUOygIVRcbYosv4IG2ZIQH0,2567
38
+ gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi,sha256=zm1KgUJtKNO5gFJPzZ9liAVO9sDSQbWjY0eiqUnSXE8,947
39
+ gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi,sha256=MKOTG0t-QPH7Gnw7obpryskQIBRbhXPidmwEyDcvjrU,2082
40
+ gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi,sha256=SqzUlfsv84MPcCEgvmAoRdgOQvTgbF-Su0Pl2NxKgZQ,2138
41
+ gllm_docproc/downloader/html/utils/__init__.pyi,sha256=YpN23jkvKEM81BO1AYj_TOyl5n3kmx3AklFFlzwhpcg,205
42
+ gllm_docproc/downloader/html/utils/web_utils.pyi,sha256=iGuo1nclEi4vK9JkgqBtPmiPPpiCBxvnxntLTckzd0Y,1395
43
+ gllm_docproc/dpo_router/__init__.pyi,sha256=nG-PYRMjqEvheg7rIXE6VFzjsQDLNQIfZae9_38eIOs,233
44
+ gllm_docproc/dpo_router/base_dpo_router.pyi,sha256=gVC3VUmVXfe3byyfugafEMQLQ-rV2S24-UN0t1_NvNs,579
45
+ gllm_docproc/dpo_router/loader_router.pyi,sha256=mKvvG0qqTGSjOPNRs_ao1GO6RucWDlkLoVoNRztIB74,2359
46
+ gllm_docproc/dpo_router/parser_router.pyi,sha256=6VMTLAKDWIr6z1xZoZMpUU0yERnixPpT6gXgnnVaCO8,1933
47
+ gllm_docproc/housekeeping/__init__.pyi,sha256=hCHqIldkCC8OetWprbipx1XwenES9iGHlaNRTwzqR4g,100
48
+ gllm_docproc/housekeeping/base_housekeeping.pyi,sha256=I8FXfPtsDhBR6z2mxsjAvij2T4RewLl6HmCGZ03skjw,416
49
+ gllm_docproc/indexer/__init__.pyi,sha256=2rF0XCyWpcpYif-We8ZC-ZpCAzLpMoClEmXhNVh2-s8,100
50
+ gllm_docproc/indexer/base_indexer.pyi,sha256=Tuj7EAMNrTkaU43Pn2GBZ-b_d4ztQLVqDHhFyxClj-s,1052
51
+ gllm_docproc/indexer/graph/__init__.pyi,sha256=iy99lSSfOgkuK2mbSup10kqqpgIfd5esAM57BSFpJ0w,310
52
+ gllm_docproc/indexer/graph/graph_rag_indexer.pyi,sha256=slEIIYp_JTc_SIKkAQlNnnDZiS34mL85jlbCcT3zF-U,368
53
+ gllm_docproc/indexer/graph/light_rag_graph_rag_indexer.pyi,sha256=OJq1Qj6hABHiqU-ZdkuzSt1DK_BNv41dakIPruQ9RE4,4191
54
+ gllm_docproc/indexer/graph/llama_index_graph_rag_indexer.pyi,sha256=RsQK7jO5a6SFpLYJhXfVclG70g1mdnt5i7JxhiqCWeA,3101
55
+ gllm_docproc/loader/__init__.pyi,sha256=STxqJvk7NyqQtJ1pmBycKaKgt2aNYaFfpXoTqMCzY9g,156
56
+ gllm_docproc/loader/base_loader.pyi,sha256=qf6exWFn7ShRLdHl0LoyOIRyGV74BNA57RFavtX1GQw,1322
57
+ gllm_docproc/loader/loader_utils.pyi,sha256=GX7HbduYkCeoBYOHBctlIurNWgclbePePEv9BYQ_zFI,1828
58
+ gllm_docproc/loader/pipeline_loader.pyi,sha256=w8XMX-3DOJ6RN5y8pU-vW1N4HzlopwuJLQgGC4GdZ3A,1771
59
+ gllm_docproc/loader/audio/__init__.pyi,sha256=dNPtbcP9rdG7Dr_jYcUEZ_rpyK2X0LdFC9S_JG8PcpQ,80
60
+ gllm_docproc/loader/audio/audio_loader.pyi,sha256=1s7cGsQdz3KSxMc2AAHYqKHxkFlnqkFTmPAYQgPDkMM,2243
61
+ gllm_docproc/loader/csv/__init__.pyi,sha256=tYZk9vD4H1dLn6AkYaOZK5YQs4nhLLUtdWdHnm9rQ0g,84
62
+ gllm_docproc/loader/csv/pandas_loader.pyi,sha256=VYWUWQhN0dF1K82r6h4Tv4liW3lpj77xBuSMAN6R2fQ,2808
63
+ gllm_docproc/loader/docx/__init__.pyi,sha256=R69OD2iJ7-M9mn6Nm3_iqIY1BxHYodHemyYuWE2LMyg,303
64
+ gllm_docproc/loader/docx/docx2python_loader.pyi,sha256=-9ZIM9KKGwHClCjaPbaTrCIcC4_im6W1wMrHeHvcJS0,2523
65
+ gllm_docproc/loader/docx/python_docx_loader.pyi,sha256=SBp4IqD19zG-MJ_doOrW7JIbbtlPBvn3--uCWEv7loI,2016
66
+ gllm_docproc/loader/docx/python_docx_table_loader.pyi,sha256=RokUQ6a6k-pDbGTqUJyoVVG89Wo5G_dfSUjshFYHl2A,1721
67
+ gllm_docproc/loader/exception/__init__.pyi,sha256=9mD9OB7BGgG_tEVAMw0ahzIEu6dnVqyiEqcxr5PBoCA,259
68
+ gllm_docproc/loader/exception/unsupported_file_extension_error.pyi,sha256=ux-G38V35LP3Y__I98ccTv5ekoOdOXUKK2UQh0WYd3g,261
69
+ gllm_docproc/loader/exception/video_conversion_error.pyi,sha256=wUhhMh6SCMFcT0G9pvHdlCRJ7jYvznscR8AIuATdfL4,430
70
+ gllm_docproc/loader/html/__init__.pyi,sha256=rBA2i4IXnLnpLPi0SHvGtUMVgJ1pFMV-48pIrS-1cKQ,239
71
+ gllm_docproc/loader/html/html_base_loader.pyi,sha256=_cQCV_DGY5XkfU0VlVirh2im9bFO0WK_uE4RZCKe7z8,1160
72
+ gllm_docproc/loader/html/exception/__init__.pyi,sha256=itSjlJJFFC9Z9ZYdby5iW1yd77eKws0tEwOkQmnVHm0,105
73
+ gllm_docproc/loader/html/exception/html_load_exception.pyi,sha256=7umMZsJSHs3H-gyIsxRtaGSmvpVskU25MEGS6DLy9-w,254
74
+ gllm_docproc/loader/html/flat/__init__.pyi,sha256=Q7WDbp0ZZ1ATcvn8I4Ix1OBJYGXajHMXxXceAmig8c0,93
75
+ gllm_docproc/loader/html/flat/html_flat_base_handler.pyi,sha256=TfQ8wSGefR3qXn6-z0nUjz9Ot3WKc6vNfuxk4OLFEkY,3185
76
+ gllm_docproc/loader/html/flat/html_flat_loader.pyi,sha256=SBlBs2lrC0Xs4YfaWPQeCgCVtNwcl6yLXRWRe1mtspg,1781
77
+ gllm_docproc/loader/html/flat/html_flat_merger.pyi,sha256=BVrpFq3MEdLX4z4JE13ECooFjt0A4Y1UDIcuadWatUE,1356
78
+ gllm_docproc/loader/html/nested/__init__.pyi,sha256=pd14asYg0pcfwnlLkvE4rpfIKFphdg86Kfy_Jun74ho,101
79
+ gllm_docproc/loader/html/nested/dictionary_utils.pyi,sha256=TNcvYtYUv41pZeMaQVX3WW9gIe0elI9l_MmzIWzcj64,1494
80
+ gllm_docproc/loader/html/nested/html_nested_base_handler.pyi,sha256=YBlhfDq_Mq0eYiA-8XDIH1EnlqxnSm6Tnzb6Af3dMwY,4734
81
+ gllm_docproc/loader/html/nested/html_nested_element_handler.pyi,sha256=wHJJDl28NgfbsKCD8UgsucWth4mXVmMMdp0c2RaeEtc,916
82
+ gllm_docproc/loader/html/nested/html_nested_loader.pyi,sha256=YXXo34KJ9WMzYdzSng0a6fq9THf909-CkoTJ81pdLKs,917
83
+ gllm_docproc/loader/html/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
+ gllm_docproc/loader/html/utils/flat_table_utils.pyi,sha256=NLYp_nth5uYIq3EV6T1BqDD-SB6wsBaMiafWa5OZo5I,1949
85
+ gllm_docproc/loader/html/utils/html_utils.pyi,sha256=zGSC3n_WfXUmUbt-H4HCSBdZZjTs9zOXubUWIHrMaZk,1628
86
+ gllm_docproc/loader/html/utils/removed_components.pyi,sha256=UGUFq-rUlufUbs7uAp_FbDNzNVe_f4YVviwx_TLVSOg,2090
87
+ gllm_docproc/loader/html/utils/string_utils.pyi,sha256=OrXkM4n53mS5GIrlZJNCfQQLP8CMULt4yVyogNW38r4,1092
88
+ gllm_docproc/loader/html/utils/table_utils.pyi,sha256=vbfesENns4ozUOhEH9pDVRgCTS3wKV5dIhYIuJu1V5w,2740
89
+ gllm_docproc/loader/image/__init__.pyi,sha256=Eg6DMkEWfCI1Djmjw3-ZLJ4ydY19Ax-sjzpcz9FNkWs,80
90
+ gllm_docproc/loader/image/image_loader.pyi,sha256=FygZyED2NySP1TG2RfhMJwMF8IaiJGWIGVbCiKwudxM,2463
91
+ gllm_docproc/loader/json/__init__.pyi,sha256=FwvHdSzGzNaPGTw0ge_-YVqBoPj5YQtLJLs3JxdNvEE,109
92
+ gllm_docproc/loader/json/json_elements_loader.pyi,sha256=9nXNC7X3grByacMqrm9jUf9tFbGSd7YA2pUvQxsng6s,1466
93
+ gllm_docproc/loader/pdf/__init__.pyi,sha256=sVzBJIHISSMMf59RhztY9o0fP6f6sc7Dg2_ftJxqQEo,1276
94
+ gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi,sha256=KOxp2fKL9ijX83MGjFZgYlEVKiPKDcUveOvzOiNtsqo,1691
95
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi,sha256=mWvju7jHJNC6cpzxT0SfAoAWuZpTKufzCAP_zKgpSNo,2614
96
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi,sha256=-az2yXu3NqE5_LuB2QW_z9x3Nzfjxjfrkyj1RBXP25w,2628
97
+ gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi,sha256=ghQ71Fvjgh707YONVFfnV4hlQAECUfYO2LGmXFvWdfQ,1660
98
+ gllm_docproc/loader/pdf/pdf_loader_utils.pyi,sha256=u7FGjtY0nS7AEGFG2rP_xB8d9vf-7j023MoQ_s3AhfY,2938
99
+ gllm_docproc/loader/pdf/pdf_miner_loader.pyi,sha256=J3ktjrct3_OzXoD8EU5R8N7UgmHBWJwUUxKExTUJxbo,2216
100
+ gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi,sha256=l1JY1M8JvAVlAK73VDhJCLPWp1lAf9QmnUmSPCp6udI,1518
101
+ gllm_docproc/loader/pdf/pdf_page_loader.pyi,sha256=UWMIDigAamNlfTUcuzKvUsQjU4Qbf7dEBsPLH1mRKV0,2030
102
+ gllm_docproc/loader/pdf/pdf_plumber_loader.pyi,sha256=Sltw_bo0-tnDTSILuOAenEgUIDBSn4TQFB32G97G9nA,2017
103
+ gllm_docproc/loader/pdf/pymupdf_loader.pyi,sha256=m9eve25n7K2VKqTAzivFXTwEwtBlkXxN2ANbY8Hgkg4,3443
104
+ gllm_docproc/loader/pdf/pymupdf_span_loader.pyi,sha256=Rmrrc-VIQuceejRJ0WbWDB2YMwcPLEgsKXG-gDYuUQk,3462
105
+ gllm_docproc/loader/pdf/pymupdf_utils.pyi,sha256=v-K2r0CuQ8aJbKRRmXqtutedQ3-cwlAg2sSMquk19r0,3601
106
+ gllm_docproc/loader/pdf/tabula_loader.pyi,sha256=8y6gHpttx44u4ryovzsW4O7QfvBAMxHbnmaHOBz23I8,1756
107
+ gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi,sha256=tjuyRXvE50V4sBpTCGLDPJv0Vo0_noZqoyhoP8CUUdM,2008
108
+ gllm_docproc/loader/pptx/__init__.pyi,sha256=ZTVZxVIkdkysesUuZ06EaodddLVJwcaaPvIZdQg2t8E,101
109
+ gllm_docproc/loader/pptx/python_pptx_loader.pyi,sha256=rxjc0e5ksP_ZY7EJQSx_YJLsoQ-W5Cd_OyfBIfL_1go,2532
110
+ gllm_docproc/loader/txt/__init__.pyi,sha256=0Lz4_zBg0emZiL62MCZRBHTB_ebiGWDE7QlXf3SwwrU,72
111
+ gllm_docproc/loader/txt/txt_loader.pyi,sha256=Zyb7a_7MK00BJA2RcycXQdtl7YeBsBlnOHv3g63ktTA,2219
112
+ gllm_docproc/loader/video/__init__.pyi,sha256=THvj_OJ341LQ_XeUcf_mU-Gr8cpTbrIcr7LVESLDGMk,121
113
+ gllm_docproc/loader/video/video_loader_utils.pyi,sha256=IMKY4p3WdVZni4cC6WAvVQQLKHmeDhYeOYPMWkVQrok,4717
114
+ gllm_docproc/loader/video/video_transcript_loader.pyi,sha256=bhd4xtTRj94_--cXL-_oXK3pq85ML0rR0L-EhMz-A-0,3150
115
+ gllm_docproc/loader/xlsx/__init__.pyi,sha256=HbXl6OsK-Ws7NGrz5VOVbBEm-6TqzEF960ekCanbh_k,92
116
+ gllm_docproc/loader/xlsx/openpyxl_loader.pyi,sha256=0pc4Fh2CY1uox515l9dMKPPqboVa_HdhSoTSa_zo8H8,1926
117
+ gllm_docproc/model/__init__.pyi,sha256=HxFPOznQRhtZrJDS41e5nZhqRJFYy54sgYsbCIalwPk,318
118
+ gllm_docproc/model/element.pyi,sha256=V2jE90UFpFje9SHAYN10bEMAUV9IPFxBS6aA66C_jcA,1058
119
+ gllm_docproc/model/element_metadata.pyi,sha256=YqbUryhgAkqMTdncZEIq3VQq_uGmTgTI__A__O3iD4I,811
120
+ gllm_docproc/model/loader_type.pyi,sha256=KElt3ZoKK-8KspLNAgM7d_rEzSRL18orzUgVlpoMVBM,413
121
+ gllm_docproc/model/media.pyi,sha256=8LkDAtgan-ComRLnp6_Kblghovl18hwR6S71K6jnXpM,1660
122
+ gllm_docproc/model/parser_type.pyi,sha256=AVv-VVsLXdEYXWEdXdZAAoIkOn1ocUeNoNfsOGz9mME,383
123
+ gllm_docproc/parser/__init__.pyi,sha256=jY_LxmoS3ye6-3pZM77x0ml-s3d3cARGBATPHDxoXqU,156
124
+ gllm_docproc/parser/base_parser.pyi,sha256=F8vVW8rHMt-6XknlgQy3qEv32rHVg3gUhWABzNsGWhs,1135
125
+ gllm_docproc/parser/pipeline_parser.pyi,sha256=emQW79pLtb3kU1FUig2SWGaA-e8Syw8IXFH1yOUmcPI,1237
126
+ gllm_docproc/parser/document/__init__.pyi,sha256=I-qd3vokhbX6siPe8kvwjs1kWUsFz5rzYS5nkwu_H3g,324
127
+ gllm_docproc/parser/document/docx_parser.pyi,sha256=YAzEoZWVcL0_MGHj6Tlx0OFII3F4XWD3xomMvvMO2kI,1519
128
+ gllm_docproc/parser/document/pdf_parser.pyi,sha256=4OwnSCrl6-pQehpEjQ-vm8Ho66I8JR1BlvidVvzxYig,1511
129
+ gllm_docproc/parser/document/pptx_parser.pyi,sha256=bHGUqVm341W7pggYdZUgU_fWs25WJ_hVHPDs2MHl0k8,1590
130
+ gllm_docproc/parser/document/txt_parser.pyi,sha256=vfLI61LCBTRALu2fG-TEnYEksuFxaiz3A0F3kqbhPN8,863
131
+ gllm_docproc/parser/document/xlsx_parser.pyi,sha256=DJtuJxYJDsWp1DE3k9kSYqObVyYoAb7KOjWow_yqvcE,1068
132
+ gllm_docproc/parser/html/__init__.pyi,sha256=Tq5phnPf8IxLEjAvAS0O1lc6hoCFK8yYtLiHc1q-plM,194
133
+ gllm_docproc/parser/html/flat/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
+ gllm_docproc/parser/html/flat/html_flat_parser.pyi,sha256=1Q5tEzaZ7Fro1V3L3ky_IRubkLDMeK6Rl6J8aDGgCfE,1253
135
+ gllm_docproc/parser/html/nested/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
+ gllm_docproc/parser/html/nested/html_json_processor.pyi,sha256=MyBi2fAd-qVQrVYeuOi0Mk17MEyiGfltbobzd8506Yw,6319
137
+ gllm_docproc/parser/html/nested/html_nested_parser.pyi,sha256=ZK2dkti7g664LdnlQcIAmtLVCE1rZX66A1VA124PH7A,990
138
+ gllm_docproc/parser/html/nested/nested_element.pyi,sha256=Ym4OEkrT58XFpaMA_gWUeopbxrDPoWanFB0FE5UZLHc,1246
139
+ gllm_docproc/parser/image/__init__.pyi,sha256=5qO_8zVevpSuph9BtfUi_CBk2xlXFcxYh786KWHz7p8,335
140
+ gllm_docproc/parser/image/image_mime_normalization_parser.pyi,sha256=4jOHOVQn5F8jNX2h6lgoZ1t5rnE9dqPmQ2uLnUvpp5U,2044
141
+ gllm_docproc/parser/image/image_plain_small_filter_parser.pyi,sha256=k7FstB7VBgJ-80AUnAMGg4QBe1rZsp2l91zWOhoQRIU,1891
142
+ gllm_docproc/parser/table/__init__.pyi,sha256=MPFJ33qPHic7DgS8QwOI2-0gw9SwhsFjmHDkvGsYHGc,109
143
+ gllm_docproc/parser/table/table_caption_parser.pyi,sha256=VU1y8vg4JfXb-NWvnZP4QiFxDD9mcss_xVFxVyMm10E,2704
144
+ gllm_docproc/request_handler/__init__.pyi,sha256=hJJrWfbAdRUH8jZfjW4Q2PqNhjo6rvK8BTDVCbBoJ28,109
145
+ gllm_docproc/request_handler/base_request_handler.pyi,sha256=kxNm-Qzg5OFL-kqP3PNtQGwn1d3Na2eXNqk6D28fiJ8,445
146
+ gllm_docproc/response_handler/__init__.pyi,sha256=Ch5ht8cCWZqvePwp7Az9Uq-18ZPpghBwB7P8UnmUINw,113
147
+ gllm_docproc/response_handler/base_response_handler.pyi,sha256=vaozBa_1MEoW4EfSC_vLvvI5NrpbxAo2061uHqfrlGM,1249
148
+ gllm_docproc/utils/__init__.pyi,sha256=4JRXAdy2jD9KY041P20tUSOC7ldcL5ouxmNw0wH3Q-8,97
149
+ gllm_docproc/utils/async_utils.pyi,sha256=fvIjvXEQtHNTtEv0-4OuGfClP3TtJ_PBdXxwCpwCDfU,637
150
+ gllm_docproc/utils/file_utils.pyi,sha256=OEw7le7YUxc1HgUuYTzGW9Ml-XBkp56OsWsJW38_95I,2608
151
+ gllm_docproc/utils/html_constants.pyi,sha256=yjsR3x6UlOG8FV09_Ih_HlxZGGZ80GIraBW-mUU05vk,2826
152
+ gllm_docproc/validator/__init__.pyi,sha256=GqonAdrVx64F886RNEx8jC9umhPZgMEeSa952gcLGAA,499
153
+ gllm_docproc/validator/base_validator.pyi,sha256=JzyZQpMhM8khGgQ_j1AP04nJdYpLmLRw_s0AGqTYepY,1585
154
+ gllm_docproc/validator/character_count_validator.pyi,sha256=ovIc2_zCuSYworo-GYz6Ota9ugievC1RWzd-ZPh6Po8,1481
155
+ gllm_docproc/validator/file_size_validator.pyi,sha256=REDnAJzJxLETWYHQtMVPMRiJ_w31Mj2WsCX9Ot2190I,1196
156
+ gllm_docproc/validator/page_count_validator.pyi,sha256=usZslugBmm5rHGElvNzjD9jhJvQ-8MXCZyIghrGYZ5g,1284
157
+ gllm_docproc/validator/pipeline_validator.pyi,sha256=ugtKNk1WgYZjS7GIFjyQzz_AbwdTfNJaWgbzCuM7dP4,1890
158
+ gllm_docproc/validator/model/__init__.pyi,sha256=VYI9jyIs0eIjZYofewTUMzAPziAe2XH5f23QAhK3NoE,176
159
+ gllm_docproc/validator/model/validator_input.pyi,sha256=uefNnvzQdIJZ6cuehl6zwF21zYA5Hd6W27lLp6WgKB8,1833
160
+ gllm_docproc/validator/model/validator_result.pyi,sha256=AkTTC67AWfl6U-z75Q8mOb56ozbguI5uJLn4B6jSAWM,715
161
+ gllm_docproc.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
162
+ gllm_docproc_binary-0.7.21.dist-info/METADATA,sha256=68re0ndZoXIDqltAT_tkPNtRu-2plhJUk7Aj4SblfYI,6488
163
+ gllm_docproc_binary-0.7.21.dist-info/WHEEL,sha256=hF0GNNNOCwRi0V1KNSXUmTJBNSRZ_92NEYrTCPhm6WA,104
164
+ gllm_docproc_binary-0.7.21.dist-info/top_level.txt,sha256=FzUqfBCCn6DsB0K9QO5mNXrR2VbqKu__KhFzHgHVz90,13
165
+ gllm_docproc_binary-0.7.21.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: Nuitka (2.6.9)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-macosx_13_0_arm64
5
+
@@ -0,0 +1 @@
1
+ gllm_docproc