gllm-docproc-binary 0.7.22__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-docproc-binary might be problematic. Click here for more details.

Files changed (167) hide show
  1. gllm_docproc/__init__.pyi +0 -0
  2. gllm_docproc/chunker/__init__.pyi +3 -0
  3. gllm_docproc/chunker/base_chunker.pyi +28 -0
  4. gllm_docproc/chunker/structured_element/__init__.pyi +3 -0
  5. gllm_docproc/chunker/structured_element/chunk_enricher.pyi +43 -0
  6. gllm_docproc/chunker/structured_element/structured_element_chunker.pyi +80 -0
  7. gllm_docproc/chunker/table/__init__.pyi +3 -0
  8. gllm_docproc/chunker/table/table_chunker.pyi +45 -0
  9. gllm_docproc/converter/__init__.pyi +3 -0
  10. gllm_docproc/converter/base_converter.pyi +15 -0
  11. gllm_docproc/data_generator/__init__.pyi +5 -0
  12. gllm_docproc/data_generator/base_data_generator.pyi +18 -0
  13. gllm_docproc/data_generator/image_data_generator/__init__.pyi +4 -0
  14. gllm_docproc/data_generator/image_data_generator/image_caption_data_generator.pyi +40 -0
  15. gllm_docproc/data_generator/image_data_generator/multi_model_image_caption_data_generator.pyi +51 -0
  16. gllm_docproc/data_generator/pii_data_generator/__init__.pyi +1 -0
  17. gllm_docproc/downloader/__init__.pyi +5 -0
  18. gllm_docproc/downloader/base_downloader.pyi +19 -0
  19. gllm_docproc/downloader/direct_file_url_downloader.pyi +40 -0
  20. gllm_docproc/downloader/google_drive_downloader.pyi +36 -0
  21. gllm_docproc/downloader/html/__init__.pyi +6 -0
  22. gllm_docproc/downloader/html/exception/__init__.pyi +4 -0
  23. gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi +16 -0
  24. gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi +15 -0
  25. gllm_docproc/downloader/html/firecrawl_downloader.pyi +49 -0
  26. gllm_docproc/downloader/html/html_downloader.pyi +114 -0
  27. gllm_docproc/downloader/html/requests_downloader.pyi +46 -0
  28. gllm_docproc/downloader/html/scraper/__init__.pyi +0 -0
  29. gllm_docproc/downloader/html/scraper/scraper/__init__.pyi +0 -0
  30. gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi +9 -0
  31. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi +27 -0
  32. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi +28 -0
  33. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi +61 -0
  34. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi +66 -0
  35. gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi +22 -0
  36. gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi +57 -0
  37. gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi +51 -0
  38. gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi +43 -0
  39. gllm_docproc/downloader/html/utils/__init__.pyi +3 -0
  40. gllm_docproc/downloader/html/utils/web_utils.pyi +39 -0
  41. gllm_docproc/dpo_router/__init__.pyi +5 -0
  42. gllm_docproc/dpo_router/base_dpo_router.pyi +16 -0
  43. gllm_docproc/dpo_router/loader_router.pyi +52 -0
  44. gllm_docproc/dpo_router/parser_router.pyi +42 -0
  45. gllm_docproc/housekeeping/__init__.pyi +3 -0
  46. gllm_docproc/housekeeping/base_housekeeping.pyi +14 -0
  47. gllm_docproc/indexer/__init__.pyi +3 -0
  48. gllm_docproc/indexer/base_indexer.pyi +30 -0
  49. gllm_docproc/indexer/graph/__init__.pyi +4 -0
  50. gllm_docproc/indexer/graph/graph_rag_indexer.pyi +11 -0
  51. gllm_docproc/indexer/graph/light_rag_graph_rag_indexer.pyi +97 -0
  52. gllm_docproc/indexer/graph/llama_index_graph_rag_indexer.pyi +79 -0
  53. gllm_docproc/indexer/vector/__init__.pyi +3 -0
  54. gllm_docproc/indexer/vector/vector_db_indexer.pyi +53 -0
  55. gllm_docproc/loader/__init__.pyi +4 -0
  56. gllm_docproc/loader/audio/__init__.pyi +3 -0
  57. gllm_docproc/loader/audio/audio_loader.pyi +45 -0
  58. gllm_docproc/loader/base_loader.pyi +30 -0
  59. gllm_docproc/loader/csv/__init__.pyi +3 -0
  60. gllm_docproc/loader/csv/pandas_loader.pyi +53 -0
  61. gllm_docproc/loader/docx/__init__.pyi +5 -0
  62. gllm_docproc/loader/docx/docx2python_loader.pyi +46 -0
  63. gllm_docproc/loader/docx/python_docx_loader.pyi +35 -0
  64. gllm_docproc/loader/docx/python_docx_table_loader.pyi +35 -0
  65. gllm_docproc/loader/exception/__init__.pyi +4 -0
  66. gllm_docproc/loader/exception/unsupported_file_extension_error.pyi +7 -0
  67. gllm_docproc/loader/exception/video_conversion_error.pyi +12 -0
  68. gllm_docproc/loader/html/__init__.pyi +5 -0
  69. gllm_docproc/loader/html/exception/__init__.pyi +3 -0
  70. gllm_docproc/loader/html/exception/html_load_exception.pyi +7 -0
  71. gllm_docproc/loader/html/flat/__init__.pyi +3 -0
  72. gllm_docproc/loader/html/flat/html_flat_base_handler.pyi +65 -0
  73. gllm_docproc/loader/html/flat/html_flat_loader.pyi +30 -0
  74. gllm_docproc/loader/html/flat/html_flat_merger.pyi +22 -0
  75. gllm_docproc/loader/html/html_base_loader.pyi +25 -0
  76. gllm_docproc/loader/html/nested/__init__.pyi +3 -0
  77. gllm_docproc/loader/html/nested/dictionary_utils.pyi +40 -0
  78. gllm_docproc/loader/html/nested/html_nested_base_handler.pyi +128 -0
  79. gllm_docproc/loader/html/nested/html_nested_element_handler.pyi +24 -0
  80. gllm_docproc/loader/html/nested/html_nested_loader.pyi +15 -0
  81. gllm_docproc/loader/html/utils/__init__.pyi +0 -0
  82. gllm_docproc/loader/html/utils/flat_table_utils.pyi +44 -0
  83. gllm_docproc/loader/html/utils/html_utils.pyi +41 -0
  84. gllm_docproc/loader/html/utils/removed_components.pyi +53 -0
  85. gllm_docproc/loader/html/utils/string_utils.pyi +33 -0
  86. gllm_docproc/loader/html/utils/table_utils.pyi +78 -0
  87. gllm_docproc/loader/image/__init__.pyi +3 -0
  88. gllm_docproc/loader/image/image_loader.pyi +54 -0
  89. gllm_docproc/loader/json/__init__.pyi +3 -0
  90. gllm_docproc/loader/json/json_elements_loader.pyi +35 -0
  91. gllm_docproc/loader/loader_utils.pyi +43 -0
  92. gllm_docproc/loader/pdf/__init__.pyi +14 -0
  93. gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi +37 -0
  94. gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi +47 -0
  95. gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi +49 -0
  96. gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi +38 -0
  97. gllm_docproc/loader/pdf/pdf_loader_utils.pyi +59 -0
  98. gllm_docproc/loader/pdf/pdf_miner_loader.pyi +38 -0
  99. gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi +33 -0
  100. gllm_docproc/loader/pdf/pdf_page_loader.pyi +41 -0
  101. gllm_docproc/loader/pdf/pdf_plumber_loader.pyi +35 -0
  102. gllm_docproc/loader/pdf/pymupdf_loader.pyi +55 -0
  103. gllm_docproc/loader/pdf/pymupdf_span_loader.pyi +56 -0
  104. gllm_docproc/loader/pdf/pymupdf_utils.pyi +77 -0
  105. gllm_docproc/loader/pdf/tabula_loader.pyi +32 -0
  106. gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi +37 -0
  107. gllm_docproc/loader/pipeline_loader.pyi +48 -0
  108. gllm_docproc/loader/pptx/__init__.pyi +3 -0
  109. gllm_docproc/loader/pptx/python_pptx_loader.pyi +48 -0
  110. gllm_docproc/loader/txt/__init__.pyi +3 -0
  111. gllm_docproc/loader/txt/txt_loader.pyi +55 -0
  112. gllm_docproc/loader/video/__init__.pyi +3 -0
  113. gllm_docproc/loader/video/video_loader_utils.pyi +97 -0
  114. gllm_docproc/loader/video/video_transcript_loader.pyi +59 -0
  115. gllm_docproc/loader/xlsx/__init__.pyi +3 -0
  116. gllm_docproc/loader/xlsx/openpyxl_loader.pyi +36 -0
  117. gllm_docproc/model/__init__.pyi +7 -0
  118. gllm_docproc/model/element.pyi +38 -0
  119. gllm_docproc/model/element_metadata.pyi +35 -0
  120. gllm_docproc/model/loader_type.pyi +20 -0
  121. gllm_docproc/model/media.pyi +51 -0
  122. gllm_docproc/model/parser_type.pyi +19 -0
  123. gllm_docproc/parser/__init__.pyi +4 -0
  124. gllm_docproc/parser/base_parser.pyi +28 -0
  125. gllm_docproc/parser/document/__init__.pyi +7 -0
  126. gllm_docproc/parser/document/docx_parser.pyi +27 -0
  127. gllm_docproc/parser/document/pdf_parser.pyi +35 -0
  128. gllm_docproc/parser/document/pptx_parser.pyi +34 -0
  129. gllm_docproc/parser/document/txt_parser.pyi +22 -0
  130. gllm_docproc/parser/document/xlsx_parser.pyi +26 -0
  131. gllm_docproc/parser/html/__init__.pyi +4 -0
  132. gllm_docproc/parser/html/flat/__init__.pyi +0 -0
  133. gllm_docproc/parser/html/flat/html_flat_parser.pyi +27 -0
  134. gllm_docproc/parser/html/nested/__init__.pyi +0 -0
  135. gllm_docproc/parser/html/nested/html_json_processor.pyi +158 -0
  136. gllm_docproc/parser/html/nested/html_nested_parser.pyi +24 -0
  137. gllm_docproc/parser/html/nested/nested_element.pyi +31 -0
  138. gllm_docproc/parser/image/__init__.pyi +4 -0
  139. gllm_docproc/parser/image/image_mime_normalization_parser.pyi +43 -0
  140. gllm_docproc/parser/image/image_plain_small_filter_parser.pyi +45 -0
  141. gllm_docproc/parser/pipeline_parser.pyi +33 -0
  142. gllm_docproc/parser/table/__init__.pyi +3 -0
  143. gllm_docproc/parser/table/table_caption_parser.pyi +66 -0
  144. gllm_docproc/request_handler/__init__.pyi +3 -0
  145. gllm_docproc/request_handler/base_request_handler.pyi +16 -0
  146. gllm_docproc/response_handler/__init__.pyi +3 -0
  147. gllm_docproc/response_handler/base_response_handler.pyi +38 -0
  148. gllm_docproc/utils/__init__.pyi +3 -0
  149. gllm_docproc/utils/async_utils.pyi +22 -0
  150. gllm_docproc/utils/file_utils.pyi +76 -0
  151. gllm_docproc/utils/html_constants.pyi +122 -0
  152. gllm_docproc/validator/__init__.pyi +6 -0
  153. gllm_docproc/validator/base_validator.pyi +34 -0
  154. gllm_docproc/validator/character_count_validator.pyi +26 -0
  155. gllm_docproc/validator/file_size_validator.pyi +20 -0
  156. gllm_docproc/validator/model/__init__.pyi +4 -0
  157. gllm_docproc/validator/model/validator_input.pyi +50 -0
  158. gllm_docproc/validator/model/validator_result.pyi +19 -0
  159. gllm_docproc/validator/page_count_validator.pyi +23 -0
  160. gllm_docproc/validator/pipeline_validator.pyi +40 -0
  161. gllm_docproc.build/.gitignore +1 -0
  162. gllm_docproc.cp311-win_amd64.pyd +0 -0
  163. gllm_docproc.pyi +220 -0
  164. gllm_docproc_binary-0.7.22.dist-info/METADATA +216 -0
  165. gllm_docproc_binary-0.7.22.dist-info/RECORD +167 -0
  166. gllm_docproc_binary-0.7.22.dist-info/WHEEL +5 -0
  167. gllm_docproc_binary-0.7.22.dist-info/top_level.txt +1 -0
gllm_docproc.pyi ADDED
@@ -0,0 +1,220 @@
1
+ # This file was generated by Nuitka
2
+
3
+ # Stubs included by default
4
+
5
+
6
+ __name__ = ...
7
+
8
+
9
+
10
+ # Modules used internally, to allow implicit dependencies to be seen:
11
+ import os
12
+ import abc
13
+ import typing
14
+ import hashlib
15
+ import inspect
16
+ import re
17
+ import langchain_text_splitters
18
+ import gllm_docproc.chunker.table.TableChunker
19
+ import pandas
20
+ import asyncio
21
+ import concurrent
22
+ import concurrent.futures
23
+ import concurrent.futures.ThreadPoolExecutor
24
+ import gllm_multimodal
25
+ import gllm_multimodal.constants
26
+ import gllm_multimodal.modality_converter
27
+ import gllm_multimodal.modality_converter.image_to_text
28
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption
29
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption.image_to_caption
30
+ import json
31
+ import gllm_core
32
+ import gllm_core.utils
33
+ import gllm_core.utils.logger_manager
34
+ import gllm_core.utils.retry
35
+ import gllm_inference
36
+ import gllm_inference.builder
37
+ import gllm_inference.output_parser
38
+ import gllm_inference.prompt_builder
39
+ import gllm_multimodal.modality_converter.image_to_text.image_to_caption.preset_image_to_caption
40
+ import gllm_privacy
41
+ import gllm_privacy.pii_detector
42
+ import gllm_privacy.pii_detector.text_analyzer
43
+ import gllm_privacy.pii_detector.text_anonymizer
44
+ import langdetect
45
+ import mimetypes
46
+ import time
47
+ import uuid
48
+ import pathlib
49
+ import magic
50
+ import requests
51
+ import requests.adapters
52
+ import urllib3
53
+ import urllib3.util
54
+ import urllib3.util.retry
55
+ import gllm_docproc.downloader.BaseDownloader
56
+ import ntpath
57
+ import bosa_connectors
58
+ import bosa_connectors.connector
59
+ import bosa_connectors.models
60
+ import bosa_connectors.models.file
61
+ import datetime
62
+ import firecrawl
63
+ import pydantic
64
+ import copy
65
+ import html_to_markdown
66
+ import scrapy
67
+ import gllm_docproc.downloader.html.exception.ItemScrapeFailedException
68
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlBaseSpider
69
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapLinkSpider
70
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapSpider
71
+ import gllm_docproc.downloader.html.utils.generate_filename_from_url
72
+ import gllm_docproc.downloader.html.utils.is_valid_url
73
+ import scrapy.http
74
+ import scrapy_playwright
75
+ import scrapy_playwright.page
76
+ import gllm_docproc.downloader.html.utils.clean_url
77
+ import urllib
78
+ import urllib.parse
79
+ import scrapy.crawler
80
+ import scrapy.spiders
81
+ import scrapy.spiders.sitemap
82
+ import scrapy.utils
83
+ import scrapy.utils.sitemap
84
+ import scrapy.linkextractors
85
+ import billiard
86
+ import gllm_docproc.downloader.html.exception.ZyteApiKeyNotProvidedException
87
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ScrapeSpider
88
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ZyteScrapeSpider
89
+ import gllm_multimodal.utils
90
+ import gllm_multimodal.utils.audio_to_text_utils
91
+ import gllm_docproc.loader.image.ImageLoader
92
+ import gllm_docproc.loader.txt.TXTLoader
93
+ import gllm_docproc.model.Element
94
+ import gllm_docproc.model.LoaderType
95
+ import _frozen_importlib_external
96
+ import gllm_docproc.indexer.BaseIndexer
97
+ import gllm_datastore
98
+ import gllm_datastore.graph_data_store
99
+ import gllm_datastore.graph_data_store.light_rag_data_store
100
+ import lightrag
101
+ import lightrag.lightrag
102
+ import gllm_datastore.graph_data_store.llama_index_graph_rag_data_store
103
+ import gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store
104
+ import llama_index
105
+ import llama_index.core
106
+ import llama_index.core.base
107
+ import llama_index.core.base.embeddings
108
+ import llama_index.core.base.embeddings.base
109
+ import llama_index.core.base.llms
110
+ import llama_index.core.base.llms.base
111
+ import llama_index.core.indices
112
+ import llama_index.core.indices.property_graph
113
+ import llama_index.core.indices.property_graph.transformations
114
+ import llama_index.core.schema
115
+ import llama_index.core.vector_stores
116
+ import llama_index.core.vector_stores.types
117
+ import __future__
118
+ import gllm_core.schema
119
+ import gllm_core.utils.concurrency
120
+ import gllm_datastore.core
121
+ import gllm_datastore.core.capabilities
122
+ import gllm_inference.schema
123
+ import tqdm
124
+ import gllm_multimodal.modality_converter.audio_to_text
125
+ import gllm_multimodal.modality_converter.audio_to_text.audio_to_text
126
+ import gllm_multimodal.modality_converter.schema
127
+ import csv
128
+ import base64
129
+ import docx2python
130
+ import docx2python.docx_output
131
+ import docx
132
+ import docx.table
133
+ import docx.text
134
+ import docx.text.paragraph
135
+ import gllm_docproc.loader.html.flat.HTMLFlatLoader
136
+ import gllm_docproc.loader.html.nested.HTMLNestedLoader
137
+ import parsel
138
+ import cairosvg
139
+ import gllm_docproc.loader.html.exception.HtmlLoadException
140
+ import tabulate
141
+ import itertools
142
+ import re.sub
143
+ import w3lib
144
+ import w3lib.html
145
+ import io
146
+ import PIL
147
+ import gllm_docproc.loader.exception.UnsupportedFileExtensionError
148
+ import zipfile
149
+ import adobe
150
+ import adobe.pdfservices
151
+ import adobe.pdfservices.operation
152
+ import adobe.pdfservices.operation.auth
153
+ import adobe.pdfservices.operation.auth.service_principal_credentials
154
+ import adobe.pdfservices.operation.io
155
+ import adobe.pdfservices.operation.io.cloud_asset
156
+ import adobe.pdfservices.operation.io.stream_asset
157
+ import adobe.pdfservices.operation.pdf_services
158
+ import adobe.pdfservices.operation.pdf_services_media_type
159
+ import adobe.pdfservices.operation.pdf_services_response
160
+ import adobe.pdfservices.operation.pdfjobs
161
+ import adobe.pdfservices.operation.pdfjobs.jobs
162
+ import adobe.pdfservices.operation.pdfjobs.jobs.extract_pdf_job
163
+ import adobe.pdfservices.operation.pdfjobs.params
164
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf
165
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_element_type
166
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_pdf_params
167
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_renditions_element_type
168
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.table_structure_type
169
+ import adobe.pdfservices.operation.pdfjobs.result
170
+ import adobe.pdfservices.operation.pdfjobs.result.extract_pdf_result
171
+ import fitz
172
+ import azure
173
+ import azure.ai
174
+ import azure.ai.documentintelligence
175
+ import azure.ai.documentintelligence.models
176
+ import azure.core
177
+ import azure.core.credentials
178
+ import collections
179
+ import collections.Counter
180
+ import pdfminer
181
+ import pdfminer.high_level
182
+ import pdfminer.layout
183
+ import gllm_docproc.loader.BaseLoader
184
+ import pdfplumber
185
+ import pdfplumber._typing
186
+ import pdfplumber.page
187
+ import pdfplumber.table
188
+ import math
189
+ import numpy
190
+ import tabula
191
+ import tabula.io
192
+ import gllm_datastore.cache
193
+ import gllm_datastore.cache.hybrid_cache
194
+ import gllm_datastore.cache.hybrid_cache.hybrid_cache
195
+ import gllm_datastore.cache.hybrid_cache.utils
196
+ import pptx
197
+ import pptx.chart
198
+ import pptx.chart.chart
199
+ import pptx.shapes
200
+ import pptx.shapes.base
201
+ import pptx.table
202
+ import sys
203
+ import soundfile
204
+ import scipy
205
+ import gllm_docproc.loader.exception.VideoConversionError
206
+ import gi
207
+ import gi.repository
208
+ import gllm_docproc.utils.run_async_in_sync
209
+ import openpyxl
210
+ import openpyxl.cell
211
+ import openpyxl.cell.cell
212
+ import openpyxl.worksheet
213
+ import openpyxl.worksheet.worksheet
214
+ import enum
215
+ import gllm_docproc.parser.BaseParser
216
+ import subprocess
217
+ import tempfile
218
+ import gllm_multimodal.utils.image_utils
219
+ import codecs
220
+ import types
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.2
2
+ Name: gllm-docproc-binary
3
+ Version: 0.7.22
4
+ Summary: A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
5
+ Author-email: GenAI SDK Team <gat-sdk@gdplabs.id>
6
+ Requires-Python: <3.13,>=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: bosa-connectors-binary<0.4.0,>=0.3.0
9
+ Requires-Dist: gllm-core-binary<0.4.0,>=0.3.0
10
+ Requires-Dist: gllm-datastore-binary[chroma,elasticsearch]<0.6.0,>=0.5.0
11
+ Requires-Dist: gllm-multimodal-binary[audio]<0.3.0,>=0.2.0
12
+ Requires-Dist: gllm-privacy-binary<0.5.0,>=0.4.0
13
+ Requires-Dist: langchain-text-splitters<0.4.0,>=0.3.2
14
+ Requires-Dist: pandas<3.0.0,>=2.2.3
15
+ Requires-Dist: pydantic<3.0.0,>=2.9.1
16
+ Requires-Dist: tabulate<0.10.0,>=0.9.0
17
+ Requires-Dist: python-magic<0.5.0,>=0.4.27; sys_platform != "win32"
18
+ Requires-Dist: python-magic-bin<0.5.0,>=0.4.14; sys_platform == "win32"
19
+ Provides-Extra: dev
20
+ Requires-Dist: coverage<8.0.0,>=7.4.4; extra == "dev"
21
+ Requires-Dist: mypy<2.0.0,>=1.15.0; extra == "dev"
22
+ Requires-Dist: pre-commit<4.0.0,>=3.7.0; extra == "dev"
23
+ Requires-Dist: pytest<9.0.0,>=8.1.1; extra == "dev"
24
+ Requires-Dist: pytest-asyncio<1.0.0,>=0.23.6; extra == "dev"
25
+ Requires-Dist: pytest-cov<6.0.0,>=5.0.0; extra == "dev"
26
+ Requires-Dist: ruff<1.0.0,>=0.6.7; extra == "dev"
27
+ Provides-Extra: audio
28
+ Requires-Dist: librosa<0.11.0,>=0.10.1; extra == "audio"
29
+ Requires-Dist: tqdm<5.0.0,>=4.66.2; extra == "audio"
30
+ Provides-Extra: docx
31
+ Requires-Dist: docx2python<3.0.0,>=2.8.0; extra == "docx"
32
+ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == "docx"
33
+ Provides-Extra: html
34
+ Requires-Dist: billiard<5.0.0,>=4.2.1; extra == "html"
35
+ Requires-Dist: firecrawl-py<5.0.0,>=4.3.6; extra == "html"
36
+ Requires-Dist: html-to-markdown<2.0.0,>=1.9.0; extra == "html"
37
+ Requires-Dist: playwright<2.0.0,>=1.40.0; extra == "html"
38
+ Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == "html"
39
+ Requires-Dist: scrapy-playwright<0.1.0,>=0.0.33; extra == "html"
40
+ Requires-Dist: scrapy-zyte-api<1.0.0,>=0.12.2; extra == "html"
41
+ Requires-Dist: zyte-api<1.0.0,>=0.4.8; extra == "html"
42
+ Provides-Extra: html-svg
43
+ Requires-Dist: cairosvg<3.0.0,>=2.8.2; extra == "html-svg"
44
+ Provides-Extra: image
45
+ Requires-Dist: aioresponses<1.0.0,>=0.7.0; extra == "image"
46
+ Requires-Dist: boto3<2.0.0,>=1.38.10; extra == "image"
47
+ Requires-Dist: pillow<12.0.0,>=11.2.1; extra == "image"
48
+ Provides-Extra: kg
49
+ Requires-Dist: asyncpg<1.0.0,>=0.30.0; extra == "kg"
50
+ Requires-Dist: gllm-datastore-binary[kg]<0.6.0,>=0.5.0; extra == "kg"
51
+ Requires-Dist: lightrag-hku<2.0.0,>=1.4.6; extra == "kg"
52
+ Requires-Dist: llama-index-embeddings-openai<1.0.0,>=0.3.0; extra == "kg"
53
+ Requires-Dist: llama-index-llms-openai<1.0.0,>=0.3.0; extra == "kg"
54
+ Provides-Extra: pdf
55
+ Requires-Dist: azure-ai-documentintelligence<2.0.0,>=1.0.0b3; extra == "pdf"
56
+ Requires-Dist: jpype1<2.0.0,>=1.5.0; extra == "pdf"
57
+ Requires-Dist: pdfminer-six<20250000,>=20231228; extra == "pdf"
58
+ Requires-Dist: pdfplumber<1.0.0,>=0.11.4; extra == "pdf"
59
+ Requires-Dist: pdfservices-sdk<5.0.0,>=4.0.0; extra == "pdf"
60
+ Requires-Dist: pymupdf<2.0.0,>=1.24.10; extra == "pdf"
61
+ Requires-Dist: tabula-py<3.0.0,>=2.9.3; extra == "pdf"
62
+ Provides-Extra: pii
63
+ Requires-Dist: langdetect<2.0.0,>=1.0.0; extra == "pii"
64
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "pii"
65
+ Provides-Extra: pptx
66
+ Requires-Dist: python-pptx<2.0.0,>=1.0.2; extra == "pptx"
67
+ Provides-Extra: video
68
+ Requires-Dist: PyGObject==3.50.0; sys_platform != "win32" and extra == "video"
69
+ Requires-Dist: numpy<2.0.0,>=1.26.0; extra == "video"
70
+ Requires-Dist: scipy<2.0.0,>=1.15.0; extra == "video"
71
+ Requires-Dist: soundfile<0.14.0,>=0.13.1; extra == "video"
72
+ Provides-Extra: xlsx
73
+ Requires-Dist: openpyxl<4.0.0,>=3.0.10; extra == "xlsx"
74
+
75
+ # GLLM Docproc
76
+
77
+ ## Description
78
+ A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
79
+
80
+ ---
81
+
82
+ ## Installation
83
+
84
+ ### Prerequisites
85
+
86
+ Mandatory:
87
+ 1. Python 3.11+ — [Install here](https://www.python.org/downloads/)
88
+ 2. pip — [Install here](https://pip.pypa.io/en/stable/installation/)
89
+ 3. uv — [Install here](https://docs.astral.sh/uv/getting-started/installation/)
90
+ 4. gcloud CLI (for authentication) — [Install here](https://cloud.google.com/sdk/docs/install), then log in using:
91
+ ```bash
92
+ gcloud auth login
93
+ ```
94
+
95
+ ---
96
+
97
+ ### Install from Artifact Registry
98
+
99
+ This requires authentication via the `gcloud` CLI.
100
+
101
+ 1. Export token
102
+ ```
103
+ export GCLOUD_ACCESS_TOKEN="$(gcloud auth print-access-token)"
104
+ ```
105
+
106
+ 2. Configure the index in your `pyproject.tom;`
107
+ ```
108
+ [[tool.uv.index]]
109
+ name = "gen-ai-internal"
110
+ url = "https://oauth2accesstoken:${GCLOUD_ACCESS_TOKEN}@glsdk.gdplabs.id/gen-ai-internal/simple/"
111
+ ```
112
+
113
+ 3. Add the dependency
114
+ ```
115
+ uv add gllm-docproc
116
+ ```
117
+
118
+ ---
119
+
120
+ ## Local Development Setup
121
+
122
+ ### Prerequisites
123
+
124
+ 1. Python 3.11+ — [Install here](https://www.python.org/downloads/)
125
+ 2. pip — [Install here](https://pip.pypa.io/en/stable/installation/)
126
+ 3. uv — [Install here](https://docs.astral.sh/uv/getting-started/installation/)
127
+ 4. gcloud CLI — [Install here](https://cloud.google.com/sdk/docs/install), then log in using:
128
+
129
+ ```bash
130
+ gcloud auth login
131
+ ```
132
+ 5. Git — [Install here](https://git-scm.com/downloads)
133
+ 6. Access to the [GDP Labs SDK GitHub repository](https://github.com/GDP-ADMIN/gl-sdk)
134
+
135
+ ---
136
+
137
+ ### 1. Clone Repository
138
+
139
+ ```bash
140
+ git clone git@github.com:GDP-ADMIN/gl-sdk.git
141
+ cd gl-sdk/libs/gllm-docproc
142
+ ```
143
+
144
+ ---
145
+
146
+ ### 2. Setup Authentication
147
+
148
+ Set the following environment variables to authenticate with internal package indexes:
149
+
150
+ ```bash
151
+ export UV_INDEX_GEN_AI_INTERNAL_USERNAME=oauth2accesstoken
152
+ export UV_INDEX_GEN_AI_INTERNAL_PASSWORD="$(gcloud auth print-access-token)"
153
+ export UV_INDEX_GEN_AI_USERNAME=oauth2accesstoken
154
+ export UV_INDEX_GEN_AI_PASSWORD="$(gcloud auth print-access-token)"
155
+ ```
156
+
157
+ ---
158
+
159
+ ### 3. Quick Setup
160
+
161
+ Run:
162
+
163
+ ```bash
164
+ make setup
165
+ ```
166
+
167
+ ---
168
+
169
+ ### 4. Activate Virtual Environment
170
+
171
+ ```bash
172
+ source .venv/bin/activate
173
+ ```
174
+
175
+ ---
176
+
177
+ ## Local Development Utilities
178
+
179
+ The following Makefile commands are available for quick operations:
180
+
181
+ ### Install uv
182
+
183
+ ```bash
184
+ make install-uv
185
+ ```
186
+
187
+ ### Install Pre-Commit
188
+
189
+ ```bash
190
+ make install-pre-commit
191
+ ```
192
+
193
+ ### Install Dependencies
194
+
195
+ ```bash
196
+ make install
197
+ ```
198
+
199
+ ### Update Dependencies
200
+
201
+ ```bash
202
+ make update
203
+ ```
204
+
205
+ ### Run Tests
206
+
207
+ ```bash
208
+ make test
209
+ ```
210
+
211
+ ---
212
+
213
+ ## Contributing
214
+
215
+ Please refer to the [Python Style Guide](https://docs.google.com/document/d/1uRggCrHnVfDPBnG641FyQBwUwLoFw0kTzNqRm92vUwM/edit?usp=sharing)
216
+ for information about code style, documentation standards, and SCA requirements.
@@ -0,0 +1,167 @@
1
+ gllm_docproc.cp311-win_amd64.pyd,sha256=VFQvsLfgeK22HGeCw0e79bm6W4_Ds3yY8WhXTLb-QsI,4417024
2
+ gllm_docproc.pyi,sha256=MyO85LevGsOhcS3HJUvc_72LpQP_UBa3dgl5h7VjpVY,7100
3
+ gllm_docproc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ gllm_docproc/chunker/__init__.pyi,sha256=GOOIYg0-Fjd3g9uJDo9q8J0Gabwt_GHD_44axN6Y-qc,83
5
+ gllm_docproc/chunker/base_chunker.pyi,sha256=1sOrmm0vHwog08QolTqvW6bv5AJ7Wi0Mg9R_e_A8Enc,1160
6
+ gllm_docproc/chunker/structured_element/__init__.pyi,sha256=0SzEj-OALKTVr4v4WgKJVZUnemuzMkhunZndFhPlR4w,136
7
+ gllm_docproc/chunker/structured_element/chunk_enricher.pyi,sha256=Va5FJQEmZu1L88P1b846CUMNyzSzRrQ-cOUTOR4bzg8,1763
8
+ gllm_docproc/chunker/structured_element/structured_element_chunker.pyi,sha256=b8mxV29ozJGKVPTWTpCa6KXE1rTavZlL350EpFR1wYI,4157
9
+ gllm_docproc/chunker/table/__init__.pyi,sha256=HP844LD8YE2VZWqiYLFEOYIM3qIHggqHm5BCAn15xX4,162
10
+ gllm_docproc/chunker/table/table_chunker.pyi,sha256=EqqDL_l9olB81p3x_EG-bjNj2-RizHrLmOA5VVwQf0o,1898
11
+ gllm_docproc/converter/__init__.pyi,sha256=jqqxJRyzpYAPcH6HaFjeuVTGAoxgEvMSOYc0SR2iy6c,91
12
+ gllm_docproc/converter/base_converter.pyi,sha256=lm9KeWtmf61oSU2tQDFKsrBuDKQc5ZMLC80Lz0VZMPM,442
13
+ gllm_docproc/data_generator/__init__.pyi,sha256=1yHopJTd6IRClBEaS0H-9hKJtFIy8dqCpbQDYh2vl70,527
14
+ gllm_docproc/data_generator/base_data_generator.pyi,sha256=y35Gs4-U1WLi7KlTBbMQ0ZQ8PZ-cMShvnjUgPaBgYPo,790
15
+ gllm_docproc/data_generator/image_data_generator/__init__.pyi,sha256=uyBXhMl9V81F2IAWzoXMzheYZNkqqLPRSsFb3PVhvrQ,406
16
+ gllm_docproc/data_generator/image_data_generator/image_caption_data_generator.pyi,sha256=RDncQDpFlJFHgEQU9PftlPEmdI-es2-jAaMQhbjP8Sg,2160
17
+ gllm_docproc/data_generator/image_data_generator/multi_model_image_caption_data_generator.pyi,sha256=wNNcudgmSLAuRqPH-0yCfcrUHH3qvcZ0orHrmxMv3B8,2771
18
+ gllm_docproc/data_generator/pii_data_generator/__init__.pyi,sha256=oNDmiHWIVo9IqZrmwj3TjFlJ3fn-ajHoxILrMBjY248,123
19
+ gllm_docproc/downloader/__init__.pyi,sha256=P67jUf_2Odq5xN_hy-XVrrw_hkd3Oqb_g7ygsgmXdlo,324
20
+ gllm_docproc/downloader/base_downloader.pyi,sha256=BNHN_JxZYa6lIiKrPC1It8KDS5P5XWi-IUs1xK9fvkY,832
21
+ gllm_docproc/downloader/direct_file_url_downloader.pyi,sha256=XEE_ZkNZd8DPB-c4gn7SJL4g9XWcqxE3vrhcB3KIFEk,1862
22
+ gllm_docproc/downloader/google_drive_downloader.pyi,sha256=j20WE_p5Cdf99qXvaWlkqi66N1bm_h1HlEoVPlgqTc0,1589
23
+ gllm_docproc/downloader/html/__init__.pyi,sha256=XxbGV-dz6ByYk91vXbu6UqSdtaLDuTpdrBHa6bKYjpM,344
24
+ gllm_docproc/downloader/html/firecrawl_downloader.pyi,sha256=Et44c2afB54d3iHW5CXgUHkFtEAGMtA-_0KR71VTyPI,2472
25
+ gllm_docproc/downloader/html/html_downloader.pyi,sha256=ossOxNEN4O10Mcug0oDTktC9DXZf4Mqu1FEsB00E15Q,5965
26
+ gllm_docproc/downloader/html/requests_downloader.pyi,sha256=1mWBhGlYArNmB4zS4v_7-j1kYEg8dHXXrUcWZ8BBeJo,2401
27
+ gllm_docproc/downloader/html/exception/__init__.pyi,sha256=Sx4tSQh97yLWL5dSd6ZtdUiNEpoMpupTVDvJWHVjA9g,290
28
+ gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi,sha256=PB0Uurm2v3anDds1eV9IGnw8fzuTjAZNc_E2sl_LrWM,623
29
+ gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi,sha256=UD1WzdOLnm4pWgu3BSVen9Hp41FzZ_9W_8eDF9FfDJQ,604
30
+ gllm_docproc/downloader/html/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi,sha256=3ssFcaAgxl07a0zVoioQfPIOgXhkQVe_isisclwwztk,2155
32
+ gllm_docproc/downloader/html/scraper/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi,sha256=2FI0gNdvv8PDRTjfEr_1xZ1PNhufmw2i_T1PhpZReOM,667
34
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi,sha256=GDx4ViQhGi0ewHpei2bseDQoPdHM1NZJBrC-NXzjopg,1366
35
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi,sha256=HrB-wf9ERVL95cFp9kUupsV9JdfY5SP-FFAmW1TqAoY,1148
36
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi,sha256=qc32owSa6u7vtfzIrqooCRhAo-cmNfkKiD8xnSeKb50,2605
37
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi,sha256=Nfo-6elB52U3rC_osCnuc2Oedyqs4w0-Fc_QHcitAjE,2633
38
+ gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi,sha256=YuWJJIhYfA53Z2_GJCoRjOVUdTr7uw-9iEtzhVg4pUQ,969
39
+ gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi,sha256=G-EZweoO5NjjicMdq5yKLMIHhk1vMrYoA1zzIydLUtA,2139
40
+ gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi,sha256=psibiVfIauedH_yq814QlXFegvWrymLQpskwFGBkyog,2189
41
+ gllm_docproc/downloader/html/utils/__init__.pyi,sha256=tPm7b-zPyJLbBYKxg5apjGXiJT3vHfS4WqOJ6Cd3A6E,208
42
+ gllm_docproc/downloader/html/utils/web_utils.pyi,sha256=FBEblpXsqC0pR6nRpsU3ziZcUXNcYHpP9FkUA8AnHAk,1434
43
+ gllm_docproc/dpo_router/__init__.pyi,sha256=PZ2ZjotPyu5y0to7O02t3PocsQqIFTtnPJFBJpnuHvM,238
44
+ gllm_docproc/dpo_router/base_dpo_router.pyi,sha256=bwSj4_EHQWVcOghZoidCrn4OBiUsZM6NQpLS_cDHkCI,595
45
+ gllm_docproc/dpo_router/loader_router.pyi,sha256=8uEqkI7tEjixPkj8SXR_iDQgFhQV4CeeHeabzu-VLXw,2411
46
+ gllm_docproc/dpo_router/parser_router.pyi,sha256=8rV1ImqjvKpmdTaUGfgpcq3-r188kh_-jLKPQVca9TE,1975
47
+ gllm_docproc/housekeeping/__init__.pyi,sha256=oL-C1roDf8io8zh90D4ugD_jAXdB_1CCcggZB1b5sTs,103
48
+ gllm_docproc/housekeeping/base_housekeeping.pyi,sha256=jV3Y5iDsU5biIjm_NJMThUeIK5g77fB3OkRv3dsfEiA,430
49
+ gllm_docproc/indexer/__init__.pyi,sha256=DAX3M09IFTLRzUmyd4XL0a6jhnZmE8UEPxZ-5dTlSt8,103
50
+ gllm_docproc/indexer/base_indexer.pyi,sha256=ITBr186WQb-dNRw6jTEyQDC0wbB7455hp9TtZWR4lP8,1082
51
+ gllm_docproc/indexer/graph/__init__.pyi,sha256=5dcxr1z5CpUJE5Pk9NhI1doLTUk3LbC-GvrOi0GQQ0s,314
52
+ gllm_docproc/indexer/graph/graph_rag_indexer.pyi,sha256=EbzrO4YA09eq2UY4Xyjr2WRwFaIncRYnsepNk3Vcuvg,379
53
+ gllm_docproc/indexer/graph/light_rag_graph_rag_indexer.pyi,sha256=GD0GOykrAHGRJJGI858MA0tYXPnWYTUK0urSbDrhaOY,4288
54
+ gllm_docproc/indexer/graph/llama_index_graph_rag_indexer.pyi,sha256=HlK2bSA-JUHD5wkOlRHIHEChz4RH3xkr8amM7ViBlqY,4897
55
+ gllm_docproc/indexer/vector/__init__.pyi,sha256=st34Q3GoXyMyic0v09arzV7gscH2yPEIp7z-5wWLOkE,100
56
+ gllm_docproc/indexer/vector/vector_db_indexer.pyi,sha256=TH_KL0wgZ76XjOdOd-gZstaKpb2tKySxETkBKNDMO8o,2413
57
+ gllm_docproc/loader/__init__.pyi,sha256=Lzsi_ajlFYzu2teZ2kKiu7HHRHz7-M5ubpJ7XRmZHcg,160
58
+ gllm_docproc/loader/base_loader.pyi,sha256=Bzo7h7czTHcbBg6m85r1T2_rWlcQYOgDaF1EUbI3bOs,1352
59
+ gllm_docproc/loader/loader_utils.pyi,sha256=xt0nyJR3I8t8_lxu-ptJmy38sNhUITnh9kUQISeFLXQ,1871
60
+ gllm_docproc/loader/pipeline_loader.pyi,sha256=xhpvW6EU7mpPHYddrsuSyG3v_981B5Q05Je8ujoexuw,1819
61
+ gllm_docproc/loader/audio/__init__.pyi,sha256=bJMhxunQuKPnJzjXiIkJTtqdaefxb9f33Xw2QB2Wco0,83
62
+ gllm_docproc/loader/audio/audio_loader.pyi,sha256=LHtSs9W78VJ32qStc7lRsFBhwgP3mdWbILwl1n5INRU,2288
63
+ gllm_docproc/loader/csv/__init__.pyi,sha256=Wr2rgvI9f3_yF7Wc6fhN8cDo5RCqTz_GTbJjpJsIivA,87
64
+ gllm_docproc/loader/csv/pandas_loader.pyi,sha256=YNlkJ0scQFUFcV62aMIh6XbUdw96UYZ8NhhVdZROkWI,2861
65
+ gllm_docproc/loader/docx/__init__.pyi,sha256=5JNIvnZyO9ZOMHJHSemAsFafwAf20iUxAiMDnR1U17I,308
66
+ gllm_docproc/loader/docx/docx2python_loader.pyi,sha256=l3HND1hsLBRQ6j_OobH5Y6_rQlkGTXe6KD3DZabaXHI,2569
67
+ gllm_docproc/loader/docx/python_docx_loader.pyi,sha256=3ulT68shiwzK1dOopdgZKWcUTAuMcGhZIrJjwEhQC6M,2051
68
+ gllm_docproc/loader/docx/python_docx_table_loader.pyi,sha256=tQp_Fypis_pz_3VZddrpujV4abzX1xtoXOaDFJSe3R4,1756
69
+ gllm_docproc/loader/exception/__init__.pyi,sha256=5mgwCzDO-ZPFFmpuethHJOMKvT9bSR5DRLTaqoeaaHM,263
70
+ gllm_docproc/loader/exception/unsupported_file_extension_error.pyi,sha256=c5xVjuQPNYZSuvlCHi87KMfrhBK-4R_i7-wr69QdUvQ,268
71
+ gllm_docproc/loader/exception/video_conversion_error.pyi,sha256=SKbp7V8qXuOpPTbUfYOnAn6LcXoGaqpLzN14Lm2aFkc,442
72
+ gllm_docproc/loader/html/__init__.pyi,sha256=hzLamxCb2AwLI8xO9ty0f-qY0kD97iAXCwUfBJyxkYw,244
73
+ gllm_docproc/loader/html/html_base_loader.pyi,sha256=zPdRd0mMw90Q6pwJNzXWWEgea-7tYyHq0Pv1eSL3QZw,1185
74
+ gllm_docproc/loader/html/exception/__init__.pyi,sha256=4pNokkFZd_UWkgce5hvYUS6f5F_b1xZ_DShwmWakUDA,108
75
+ gllm_docproc/loader/html/exception/html_load_exception.pyi,sha256=R9Was3AEqcUXqqbARpgfQ5rnRS0pTL8gPXimqbWXFbo,261
76
+ gllm_docproc/loader/html/flat/__init__.pyi,sha256=GDjScQBkVstxxPnK4DIc3UE81shLXHjmJxgim7q1oj0,96
77
+ gllm_docproc/loader/html/flat/html_flat_base_handler.pyi,sha256=xeH7Q23LhWFeCVgvbZ757_-zWJileknaUgTiU5wUJwg,3250
78
+ gllm_docproc/loader/html/flat/html_flat_loader.pyi,sha256=g8ZSgVvdKV1JQIhzPER_mkNUv27OmfiwM5LptQNC0_8,1811
79
+ gllm_docproc/loader/html/flat/html_flat_merger.pyi,sha256=q3dxADexBv_yHp3-lmtRIwx5Wf-eotrQiBYaR5Do9nE,1378
80
+ gllm_docproc/loader/html/nested/__init__.pyi,sha256=nG7Z3zV4Z4KIG2MVWgaB7V6LA9LyN3JkgMkWdItA0dA,104
81
+ gllm_docproc/loader/html/nested/dictionary_utils.pyi,sha256=mcREqjBuSZ01idMXHu4Kfqa4aITdb4Mvlp8x7SZHX3U,1534
82
+ gllm_docproc/loader/html/nested/html_nested_base_handler.pyi,sha256=CDxr-Tq0qY5sM8fQujdxDr8YTnY2CJJj1PMeB28HzNQ,4862
83
+ gllm_docproc/loader/html/nested/html_nested_element_handler.pyi,sha256=d-vld6eNj_tjcLNRO2nX41Q_vdlZxoiq4kO2zrBuw2k,940
84
+ gllm_docproc/loader/html/nested/html_nested_loader.pyi,sha256=bOw4p5r4H7qYjY6F6ndDdlkGjjqz7Ze9A47oh19yhwo,932
85
+ gllm_docproc/loader/html/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
+ gllm_docproc/loader/html/utils/flat_table_utils.pyi,sha256=gYMUixQABTQ-tRsPJPjzAuFErMssvyx7D2NxdDKk25M,1993
87
+ gllm_docproc/loader/html/utils/html_utils.pyi,sha256=T9EC30ChY89B6zvLEf2KI45yoawWZK_ZRRXaqODwaNs,1669
88
+ gllm_docproc/loader/html/utils/removed_components.pyi,sha256=wwpHUfu1HCMNIf8pvS52ibBKx3iAOXpq0TjkFPllGm8,2143
89
+ gllm_docproc/loader/html/utils/string_utils.pyi,sha256=HiKHATp4DGiORMI5WGadVKuNsqHMLVISIJi4MRXQhUs,1125
90
+ gllm_docproc/loader/html/utils/table_utils.pyi,sha256=_6xDInTK0o-XQi6Ylliqijpm8mjGfWjw4rtC_E7XVPY,2818
91
+ gllm_docproc/loader/image/__init__.pyi,sha256=ry_XYHaJlcb2zfTdt4wuiYoEClxgPsWKuGuDYkIQ2gE,83
92
+ gllm_docproc/loader/image/image_loader.pyi,sha256=AG4z5EKerNfn7NefNZFr4Fm9DLiPgm7JXCTUjLWBKjs,2517
93
+ gllm_docproc/loader/json/__init__.pyi,sha256=UwKxlnJQCMBevsVLtEdNqbUwTjSUDAiGVQ1RPCiXQYc,112
94
+ gllm_docproc/loader/json/json_elements_loader.pyi,sha256=1gwS5Osxby09X_3TFbr66_YY221dRonCS8dewwwmSH4,1501
95
+ gllm_docproc/loader/pdf/__init__.pyi,sha256=hNLgphVwW4aZsYmJV4t5zZMvTgVOXuAIn4HveSYAsIg,1290
96
+ gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi,sha256=__nBHGfSc8OXlvk6zchSM_D4Up_x6Z-DaLyCnDBiZww,1728
97
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi,sha256=-_f-jI1GkSQSIXVzrTvAGvg5pjF2vJqwAFHXTDA7_ys,2661
98
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi,sha256=64yifR4-VOtwBF8r3jlCD85pInibdbdWgWbeylbPCzo,2677
99
+ gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi,sha256=WDY5FX7OcINZKU_IZJY7qHrmzW-YQiOLMjWmyZqS6P8,1698
100
+ gllm_docproc/loader/pdf/pdf_loader_utils.pyi,sha256=ztwgYvjM80ESRZ5obioIidGwfm-TPfcC0Qd5nPCdH1Y,2997
101
+ gllm_docproc/loader/pdf/pdf_miner_loader.pyi,sha256=Ssk6kqgzucDpK_ohASfXvg5bK53dm8ktSmhClxjDsAU,2254
102
+ gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi,sha256=vpikwwzR6j26VO3UdY_UqcUavM-TozgxSwSSYTl50HY,1551
103
+ gllm_docproc/loader/pdf/pdf_page_loader.pyi,sha256=nptf8ZTvyD4UQ48A7cgggT7epRqzGsegbDQewDnoRUY,2071
104
+ gllm_docproc/loader/pdf/pdf_plumber_loader.pyi,sha256=eK_1IuYTKJmNpEWP5JQAUQ1I4H_C9sR-uqItECKTlJk,2052
105
+ gllm_docproc/loader/pdf/pymupdf_loader.pyi,sha256=Twko6w42Yficql3jBNnm0nBlxjuehSzUXJ6cq58ydiM,3498
106
+ gllm_docproc/loader/pdf/pymupdf_span_loader.pyi,sha256=srv1fLohpcHVampSs-joNPhilVzYwwA39MgTW-BBgnc,3518
107
+ gllm_docproc/loader/pdf/pymupdf_utils.pyi,sha256=RlleXspnjGcibUxwY13-kCr6d0qJm2uEfNmAqiIlq9c,3678
108
+ gllm_docproc/loader/pdf/tabula_loader.pyi,sha256=zNhW3KVr3YQLyMJQamHXd2B_QQ925r1IqprHiXM42Z8,1788
109
+ gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi,sha256=xRWomeYCvzMca1YeSNQYD8ArDpHDQf9Wz9gn3dTAfX0,2045
110
+ gllm_docproc/loader/pptx/__init__.pyi,sha256=1gb8IxMQiZkkcGZl41rdYEGjxvCvEwfWC4ZwPWyK3x0,104
111
+ gllm_docproc/loader/pptx/python_pptx_loader.pyi,sha256=tGSO_9hQzhFoeDd6HPlRNdERUE85heOAUPmEfS62ETQ,2580
112
+ gllm_docproc/loader/txt/__init__.pyi,sha256=fwhz7Y79UKInwtd_4tnq3sXFH4YdsfE-zjykWnZKmLI,75
113
+ gllm_docproc/loader/txt/txt_loader.pyi,sha256=Wos2IRSktx6_MnU4hFf7IuQg3GWSuGIygWk3Lfev1xM,2274
114
+ gllm_docproc/loader/video/__init__.pyi,sha256=UAwriOtlfSfQu03fZHZnUgTgErN60Hjqp_SONhxHbUY,124
115
+ gllm_docproc/loader/video/video_loader_utils.pyi,sha256=JMqeQTng9MOFjQQ_D3NrfTJBaHWyh5Y6PLq3coIbNJg,4814
116
+ gllm_docproc/loader/video/video_transcript_loader.pyi,sha256=Xs37HQvapUySlIRdgm_mzfUJ4hqtHmp1uXhYu-4Homg,3209
117
+ gllm_docproc/loader/xlsx/__init__.pyi,sha256=bdF9g2QKvO0E_aTBdfbwXKivHTAohdRHBQYMou8Yr2s,95
118
+ gllm_docproc/loader/xlsx/openpyxl_loader.pyi,sha256=ozyTYhzOSiSOqvKmL44f2434eOuwDcNpx5AoXISb0LA,1962
119
+ gllm_docproc/model/__init__.pyi,sha256=J1qIuXV1QS2Sp3mR7JkZZiYcKo0zPhcerxfxdQRijEg,325
120
+ gllm_docproc/model/element.pyi,sha256=Avt92ckAKJUXGI1Mfw6XtPRgjaY2yW8DZJJ-Se_l3CM,1096
121
+ gllm_docproc/model/element_metadata.pyi,sha256=p9WqqfPfRpLZaZIkkAfRVzRvt-8OhaT-gZgCc_tLXZQ,846
122
+ gllm_docproc/model/loader_type.pyi,sha256=YC3BxBLR7fyh1J7BkpmU6ELLfj3kdwJO5PJd7luoGpE,433
123
+ gllm_docproc/model/media.pyi,sha256=ypQC8HVZ9BGHLCGTEolykq_ynryX1YPbnmFxbkiCnYw,1711
124
+ gllm_docproc/model/parser_type.pyi,sha256=7q2T_l_WSr9BYiT5ToZNUr03Ulw04bPiexmkYH4Ji84,402
125
+ gllm_docproc/parser/__init__.pyi,sha256=7ylnmzWFyW4_XTVkxyj9iaIfRfBQu6_d5c-Jj781nlY,160
126
+ gllm_docproc/parser/base_parser.pyi,sha256=Sun6W4Iv92G_tztu5k6xdYmwA96qxPLK6DPoDf2yt_I,1163
127
+ gllm_docproc/parser/pipeline_parser.pyi,sha256=TBVbZQEdU9T-oNE4r9BE0VW1wu_sjy6VPPNVvwMdG9U,1270
128
+ gllm_docproc/parser/document/__init__.pyi,sha256=DgVCz5skXQW_DzQ0pWFpxNvrEuE8ehGPOuK7kAFCYC4,331
129
+ gllm_docproc/parser/document/docx_parser.pyi,sha256=XeMY-pNyEUoUg4xqtc7XMmGGImumWMmzC9C9H4iMTB4,1546
130
+ gllm_docproc/parser/document/pdf_parser.pyi,sha256=ZJvY4TkVsXAZw_NM6vBjqPgYmk__gI3Nkox4-oQCStQ,1546
131
+ gllm_docproc/parser/document/pptx_parser.pyi,sha256=MNROmr0yTvgrDJFh361ZotMOMDvJMMVM2BXrF_vlUhQ,1624
132
+ gllm_docproc/parser/document/txt_parser.pyi,sha256=sdERCHZ_lR6B7Q0oTDKp0oQm55mPVH1M-04fuZWpzkI,885
133
+ gllm_docproc/parser/document/xlsx_parser.pyi,sha256=yylVDHfUm5i_jD0ECjo2KamVxGjsrV2_t86TTsNDgdU,1094
134
+ gllm_docproc/parser/html/__init__.pyi,sha256=DAG6lL1SfvN7euCP0XSq2Bqmai931jx9_Oj8u4LGN7I,198
135
+ gllm_docproc/parser/html/flat/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
+ gllm_docproc/parser/html/flat/html_flat_parser.pyi,sha256=v7zbnvnBBg8TTBd4cq5q2PElyePHkKwtUc9B3y1PAGA,1280
137
+ gllm_docproc/parser/html/nested/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
+ gllm_docproc/parser/html/nested/html_json_processor.pyi,sha256=jnEWMBxVxh5f99YCmZk5oqRR4T2qPAYNEfExPMkJ1nc,6477
139
+ gllm_docproc/parser/html/nested/html_nested_parser.pyi,sha256=O3JhTWHIXWKNCl4kfk4lWH1SQRiv3bRbu_OMDSHicFA,1014
140
+ gllm_docproc/parser/html/nested/nested_element.pyi,sha256=-NnyjyGtP6BzQ1XQQNsm66FqL9tt75EYCxNQfh30ZGc,1277
141
+ gllm_docproc/parser/image/__init__.pyi,sha256=MF6rUQ3_xMX5O1ZEE-S1zST3kp-r2eZVyxjO1oSWDy0,339
142
+ gllm_docproc/parser/image/image_mime_normalization_parser.pyi,sha256=crJTks127xM35qkN8f3xwWj-Ha6WYLpxuSHbTGSPpvU,2087
143
+ gllm_docproc/parser/image/image_plain_small_filter_parser.pyi,sha256=4nLq9q6Wl4bCCzu8R5or41uY1_GoUKc2g10GnHfgViQ,1936
144
+ gllm_docproc/parser/table/__init__.pyi,sha256=a2Zvp3tmJat1Kgp7ZWbikqzAviRVGyVLUcS9PGZ4owo,112
145
+ gllm_docproc/parser/table/table_caption_parser.pyi,sha256=bpp_vKDYRPyrveOBIiURYDGh5yBDU04PoenokLlRzRY,2770
146
+ gllm_docproc/request_handler/__init__.pyi,sha256=nulIcWHCBdrJHMbfJGIwfJmDnlcAI76FnRl-0xt5myw,112
147
+ gllm_docproc/request_handler/base_request_handler.pyi,sha256=CC495QmCo-SQR9jXFjJXDEIY_Q9G66RVqC-UNhTRwBE,461
148
+ gllm_docproc/response_handler/__init__.pyi,sha256=JcTvIdJ4heHLgOcB4i6KGKfOSUVhjkGNhUXmxPgelXI,116
149
+ gllm_docproc/response_handler/base_response_handler.pyi,sha256=0eYFF10duSJnuOaQamth2Y7YZMV1rIwouu87DxfaLxg,1287
150
+ gllm_docproc/utils/__init__.pyi,sha256=a8YWoY2dNzmWpAwL_WHSUgOBrtGv3GdmT3ets3H-tX4,100
151
+ gllm_docproc/utils/async_utils.pyi,sha256=KAj9dTiK3SRdEhN4itwPA75KSeFaHHulJX0hObaei3s,659
152
+ gllm_docproc/utils/file_utils.pyi,sha256=ALINcy33v_fJOrRBkN27ehYGQ-APm6dTsSyB9Zi4QXg,2684
153
+ gllm_docproc/utils/html_constants.pyi,sha256=UEQilpS3NdICdoFV1qVJXKNeL67bcdrZ15S3tb0tY2c,2948
154
+ gllm_docproc/validator/__init__.pyi,sha256=HviQKNWg9XDDw2-AnvIu4icYeT5pLYdxNNlfu63l2Js,505
155
+ gllm_docproc/validator/base_validator.pyi,sha256=9JBzNmrS6r9ExIHzF3kp7hPuAvL8Ua9vqrJ5hY7DQ8Q,1619
156
+ gllm_docproc/validator/character_count_validator.pyi,sha256=CGfVHxE3UJ2euT7kx65dqQ-hUHXRly3EMPoDWdRiOyQ,1507
157
+ gllm_docproc/validator/file_size_validator.pyi,sha256=ZvjH2itrF2x-JMU9uUZa4CgZSEO9l14fcN88JT5K8Jk,1216
158
+ gllm_docproc/validator/page_count_validator.pyi,sha256=CbxKHgJgexrRkAtNbV5yvxy__AV5wGuAB56jrdOtTpo,1307
159
+ gllm_docproc/validator/pipeline_validator.pyi,sha256=hOB15RBvIAemRytyfHVLQMvIVmYARmxcYZSs-gkHk60,1930
160
+ gllm_docproc/validator/model/__init__.pyi,sha256=-XgIRV1nvMPkcezSwP-10QOp95GCKheTfC58rY942yw,180
161
+ gllm_docproc/validator/model/validator_input.pyi,sha256=q_0FGTba88oa63lSo5_5Eojoj6gav3Lp_d3YxcUM7gc,1883
162
+ gllm_docproc/validator/model/validator_result.pyi,sha256=I2ZidQAVXjdKm2gXDIyunTYrgdEF-K2HxE7cIX7vUB0,734
163
+ gllm_docproc.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
164
+ gllm_docproc_binary-0.7.22.dist-info/METADATA,sha256=0uZeXs1QcWFbyIwZ7BgCmr62WSsTQeEtapFx0LKJkM4,6704
165
+ gllm_docproc_binary-0.7.22.dist-info/WHEEL,sha256=l2aKBREYfqJ7T2ljmr6hUiXPoNvvXF47bG4IHjuSyS4,96
166
+ gllm_docproc_binary-0.7.22.dist-info/top_level.txt,sha256=FzUqfBCCn6DsB0K9QO5mNXrR2VbqKu__KhFzHgHVz90,13
167
+ gllm_docproc_binary-0.7.22.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: Nuitka (2.6.9)
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-win_amd64
5
+
@@ -0,0 +1 @@
1
+ gllm_docproc