gllm-docproc-binary 0.1.8__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-docproc-binary might be problematic. Click here for more details.

Files changed (123) hide show
  1. gllm_docproc/__init__.pyi +0 -0
  2. gllm_docproc/chunker/__init__.pyi +3 -0
  3. gllm_docproc/chunker/base_chunker.pyi +29 -0
  4. gllm_docproc/chunker/structured_element/__init__.pyi +3 -0
  5. gllm_docproc/chunker/structured_element/chunk_enricher.pyi +43 -0
  6. gllm_docproc/chunker/structured_element/structured_element_chunker.pyi +80 -0
  7. gllm_docproc/chunker/table/__init__.pyi +3 -0
  8. gllm_docproc/chunker/table/table_chunker.pyi +45 -0
  9. gllm_docproc/converter/__init__.pyi +3 -0
  10. gllm_docproc/converter/base_converter.pyi +16 -0
  11. gllm_docproc/data_generator/__init__.pyi +3 -0
  12. gllm_docproc/data_generator/base_data_generator.pyi +19 -0
  13. gllm_docproc/downloader/__init__.pyi +3 -0
  14. gllm_docproc/downloader/base_downloader.pyi +16 -0
  15. gllm_docproc/downloader/html/__init__.pyi +4 -0
  16. gllm_docproc/downloader/html/exception/__init__.pyi +4 -0
  17. gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi +16 -0
  18. gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi +15 -0
  19. gllm_docproc/downloader/html/html_downloader.pyi +91 -0
  20. gllm_docproc/downloader/html/scraper/__init__.pyi +0 -0
  21. gllm_docproc/downloader/html/scraper/scraper/__init__.pyi +0 -0
  22. gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi +9 -0
  23. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi +27 -0
  24. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi +29 -0
  25. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi +61 -0
  26. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi +50 -0
  27. gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi +22 -0
  28. gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi +57 -0
  29. gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi +51 -0
  30. gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi +45 -0
  31. gllm_docproc/downloader/html/utils/__init__.pyi +3 -0
  32. gllm_docproc/downloader/html/utils/web_utils.pyi +21 -0
  33. gllm_docproc/dpo_router/__init__.pyi +3 -0
  34. gllm_docproc/dpo_router/base_dpo_router.pyi +17 -0
  35. gllm_docproc/housekeeping/__init__.pyi +3 -0
  36. gllm_docproc/housekeeping/base_housekeeping.pyi +15 -0
  37. gllm_docproc/indexer/__init__.pyi +3 -0
  38. gllm_docproc/indexer/base_indexer.pyi +31 -0
  39. gllm_docproc/indexer/graph/__init__.pyi +3 -0
  40. gllm_docproc/indexer/knowledge_graph/__init__.pyi +4 -0
  41. gllm_docproc/loader/__init__.pyi +4 -0
  42. gllm_docproc/loader/audio/__init__.pyi +3 -0
  43. gllm_docproc/loader/base_loader.pyi +31 -0
  44. gllm_docproc/loader/docx/__init__.pyi +5 -0
  45. gllm_docproc/loader/docx/docx2python_loader.pyi +46 -0
  46. gllm_docproc/loader/docx/python_docx_loader.pyi +35 -0
  47. gllm_docproc/loader/docx/python_docx_table_loader.pyi +35 -0
  48. gllm_docproc/loader/exception/__init__.pyi +3 -0
  49. gllm_docproc/loader/exception/unsupported_file_extension_error.pyi +7 -0
  50. gllm_docproc/loader/html/__init__.pyi +5 -0
  51. gllm_docproc/loader/html/exception/__init__.pyi +3 -0
  52. gllm_docproc/loader/html/exception/html_load_exception.pyi +7 -0
  53. gllm_docproc/loader/html/flat/__init__.pyi +3 -0
  54. gllm_docproc/loader/html/flat/html_flat_base_handler.pyi +52 -0
  55. gllm_docproc/loader/html/flat/html_flat_loader.pyi +30 -0
  56. gllm_docproc/loader/html/flat/html_flat_merger.pyi +22 -0
  57. gllm_docproc/loader/html/html_base_loader.pyi +25 -0
  58. gllm_docproc/loader/html/nested/__init__.pyi +3 -0
  59. gllm_docproc/loader/html/nested/dictionary_utils.pyi +40 -0
  60. gllm_docproc/loader/html/nested/html_nested_base_handler.pyi +128 -0
  61. gllm_docproc/loader/html/nested/html_nested_element_handler.pyi +24 -0
  62. gllm_docproc/loader/html/nested/html_nested_loader.pyi +15 -0
  63. gllm_docproc/loader/html/utils/__init__.pyi +0 -0
  64. gllm_docproc/loader/html/utils/flat_table_utils.pyi +29 -0
  65. gllm_docproc/loader/html/utils/html_utils.pyi +41 -0
  66. gllm_docproc/loader/html/utils/removed_components.pyi +53 -0
  67. gllm_docproc/loader/html/utils/string_utils.pyi +33 -0
  68. gllm_docproc/loader/html/utils/table_utils.pyi +78 -0
  69. gllm_docproc/loader/json/__init__.pyi +3 -0
  70. gllm_docproc/loader/json/json_elements_loader.pyi +33 -0
  71. gllm_docproc/loader/loader_utils.pyi +42 -0
  72. gllm_docproc/loader/pdf/__init__.pyi +13 -0
  73. gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi +40 -0
  74. gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi +45 -0
  75. gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi +50 -0
  76. gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi +38 -0
  77. gllm_docproc/loader/pdf/pdf_loader_utils.pyi +59 -0
  78. gllm_docproc/loader/pdf/pdf_miner_loader.pyi +38 -0
  79. gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi +33 -0
  80. gllm_docproc/loader/pdf/pdf_plumber_loader.pyi +38 -0
  81. gllm_docproc/loader/pdf/pymupdf_loader.pyi +43 -0
  82. gllm_docproc/loader/pdf/pymupdf_span_loader.pyi +44 -0
  83. gllm_docproc/loader/pdf/pymupdf_utils.pyi +34 -0
  84. gllm_docproc/loader/pdf/tabula_loader.pyi +32 -0
  85. gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi +37 -0
  86. gllm_docproc/loader/pipeline_loader.pyi +48 -0
  87. gllm_docproc/loader/txt/__init__.pyi +3 -0
  88. gllm_docproc/loader/txt/txt_loader.pyi +26 -0
  89. gllm_docproc/loader/xlsx/__init__.pyi +3 -0
  90. gllm_docproc/loader/xlsx/openpyxl_loader.pyi +37 -0
  91. gllm_docproc/model/__init__.pyi +4 -0
  92. gllm_docproc/model/element.pyi +37 -0
  93. gllm_docproc/model/element_metadata.pyi +35 -0
  94. gllm_docproc/parser/__init__.pyi +4 -0
  95. gllm_docproc/parser/base_parser.pyi +29 -0
  96. gllm_docproc/parser/document/__init__.pyi +6 -0
  97. gllm_docproc/parser/document/docx_parser.pyi +27 -0
  98. gllm_docproc/parser/document/pdf_parser.pyi +35 -0
  99. gllm_docproc/parser/document/txt_parser.pyi +22 -0
  100. gllm_docproc/parser/document/xlsx_parser.pyi +26 -0
  101. gllm_docproc/parser/html/__init__.pyi +4 -0
  102. gllm_docproc/parser/html/flat/__init__.pyi +0 -0
  103. gllm_docproc/parser/html/flat/html_flat_parser.pyi +27 -0
  104. gllm_docproc/parser/html/nested/__init__.pyi +0 -0
  105. gllm_docproc/parser/html/nested/html_json_processor.pyi +158 -0
  106. gllm_docproc/parser/html/nested/html_nested_parser.pyi +24 -0
  107. gllm_docproc/parser/html/nested/nested_element.pyi +31 -0
  108. gllm_docproc/parser/pipeline_parser.pyi +33 -0
  109. gllm_docproc/parser/table/__init__.pyi +3 -0
  110. gllm_docproc/parser/table/table_caption_parser.pyi +66 -0
  111. gllm_docproc/request_handler/__init__.pyi +3 -0
  112. gllm_docproc/request_handler/base_request_handler.pyi +17 -0
  113. gllm_docproc/response_handler/__init__.pyi +3 -0
  114. gllm_docproc/response_handler/base_response_handler.pyi +39 -0
  115. gllm_docproc/utils/__init__.pyi +0 -0
  116. gllm_docproc/utils/file_utils.pyi +76 -0
  117. gllm_docproc/utils/html_constants.pyi +121 -0
  118. gllm_docproc.build/.gitignore +1 -0
  119. gllm_docproc.cp311-win_amd64.pyd +0 -0
  120. gllm_docproc.pyi +149 -0
  121. gllm_docproc_binary-0.1.8.dist-info/METADATA +110 -0
  122. gllm_docproc_binary-0.1.8.dist-info/RECORD +123 -0
  123. gllm_docproc_binary-0.1.8.dist-info/WHEEL +4 -0
@@ -0,0 +1,76 @@
1
+ from typing import Any
2
+
3
+ def create_folder(folder_path: str) -> None:
4
+ """Create a folder.
5
+
6
+ This function check if the folder path exists. If the folder path does not
7
+ exist, the function creates a folder in the specified folder path.
8
+
9
+ Args:
10
+ folder_path (str): The folder path to create.
11
+ """
12
+ def create_full_path(dir_path: str, filename: str, file_extension: str) -> str:
13
+ """Create a full path for a file.
14
+
15
+ This function creates a full path for a file by combining the directory
16
+ path, the filename, and the file extension.
17
+
18
+ Args:
19
+ dir_path (str): The directory path.
20
+ filename (str): The filename.
21
+ file_extension (str): The file extension.
22
+
23
+ Returns:
24
+ str: The full path for the file.
25
+ """
26
+ def save_to_json(elements: list[dict[str, Any]] | dict[str, Any], folder_path: str, file_name: str) -> None:
27
+ """Save a list of elements to a JSON file.
28
+
29
+ This function saves a list of elements to a JSON file. The function takes
30
+ the list of elements, the folder path, and the file name as input and saves
31
+ the elements to a JSON file in the specified folder.
32
+
33
+ Args:
34
+ elements (list[dict[str, Any]] | dict[str, Any]): The list of elements to save.
35
+ folder_path (str): The folder path to save the JSON file.
36
+ file_name (str): The file name of the JSON file.
37
+
38
+ Returns:
39
+ None
40
+ """
41
+ def save_to_csv(elements: list[dict[str, Any]], folder_path: str, file_name: str) -> None:
42
+ """Save a list of elements to a CSV file.
43
+
44
+ This function saves a list of elements to a CSV file. The function takes
45
+ the list of elements, the folder path, and the file name as input and saves
46
+ the elements to a CSV file in the specified folder.
47
+
48
+ Args:
49
+ elements (list[dict[str, Any]]): The list of elements to save.
50
+ folder_path (str): The folder path to save the CSV file.
51
+ file_name (str): The file name of the CSV file.
52
+
53
+ Returns:
54
+ None
55
+ """
56
+ def save_file(content: str, filename: str):
57
+ """Save the content to a file.
58
+
59
+ Args:
60
+ content (str): The content to save.
61
+ filename (str): The filename to save the content to.
62
+
63
+ Returns:
64
+ None
65
+ """
66
+ def read_json_file(file_path: str) -> list[dict[str, Any]] | dict[str, Any]:
67
+ """Read a JSON file.
68
+
69
+ This function reads a JSON file and returns the content of the JSON file.
70
+
71
+ Args:
72
+ file_path (str): The path of the JSON file to read.
73
+
74
+ Returns:
75
+ list[dict[str, Any]] | dict[str, Any]: The content of the JSON file.
76
+ """
@@ -0,0 +1,121 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_docproc.model.element import AUDIO as AUDIO, FOOTER as FOOTER, HEADER as HEADER, HEADING as HEADING, IMAGE as IMAGE, TABLE as TABLE, TITLE as TITLE, VIDEO as VIDEO
3
+
4
+ FORMATTING_TAGS: Incomplete
5
+ SPACING: str
6
+
7
+ class MetaDataKeys:
8
+ """Represents keys commonly used in metadata for web content."""
9
+ CHARSET: str
10
+ PROPERTY: str
11
+ CONTENT: str
12
+ NAME: str
13
+ HTTP_EQUIV: str
14
+ URL: str
15
+ TITLE: str
16
+ METADATA: str
17
+ SOURCE: str
18
+ SOURCE_TYPE: str
19
+ LOADED_DATETIME: str
20
+
21
+ class ContentDataKeys:
22
+ """Represents keys commonly used in web content data."""
23
+ TAG: str
24
+ CONTENT: str
25
+ SOURCE: str
26
+ TYPE: str
27
+ SRC: str
28
+ PLACEHOLDER: str
29
+ TABLE: str
30
+ HREF: str
31
+ ALT: str
32
+ CLASS: str
33
+ VALUE: str
34
+
35
+ class ItemDataKeys:
36
+ """Represents keys used for handling item data."""
37
+ ELEMENTS: str
38
+ TEXT: str
39
+ STRUCTURE: str
40
+ ELEMENT_ID: str
41
+ INDEX: str
42
+ LINK: str
43
+ FORMATS: str
44
+ COMBINE_PREV: str
45
+ LIST_TYPE: str
46
+ IS_LIST_FIRST_ITEM: str
47
+ METADATA: str
48
+ URL: str
49
+ GROUP_ID: str
50
+ PARENT_ID: str
51
+ LINE_BREAK: str
52
+ HTML_TAGS: str
53
+ ROW_ITEM: str
54
+ COLSPAN: str
55
+ ROWSPAN: str
56
+
57
+ class HTMLTags:
58
+ """Represents commonly used HTML tags as constants."""
59
+ IMG: str
60
+ INPUT: str
61
+ SVG: str
62
+ SOURCE: str
63
+ TABLE: str
64
+ A: str
65
+ VIDEO: str
66
+ AUDIO: str
67
+ IFRAME: str
68
+ EMBED: str
69
+ TEXT: str
70
+ UL: str
71
+ OL: str
72
+ LI: str
73
+ P: str
74
+ BR: str
75
+ H: Incomplete
76
+ HEADER: str
77
+ TITLE: str
78
+ FOOTER: str
79
+ MEDIA_TAGS: Incomplete
80
+ TR: str
81
+ TD: str
82
+ TH: str
83
+ TBODY: str
84
+ TFOOT: str
85
+ THEAD: str
86
+
87
+ class ErrorMessage:
88
+ """Represents predefined error messages used in the application."""
89
+ ERROR_FAILED_SAVE_JSON: str
90
+ ERROR_FAILED_SAVE_CSV: str
91
+ ERROR_FAILED_EXTRACT_DATA: str
92
+ ERROR_MISSING_KEY: str
93
+ ERROR_FAILED_TO_PROCESS_ITEM: str
94
+ ERROR_FAILED_TO_OPEN_SPIDER: str
95
+ ERROR_UNKNOWN_SOURCE: str
96
+
97
+ class Structure:
98
+ """Represents the structure of the content."""
99
+ @classmethod
100
+ def get_structure(cls, tag: str):
101
+ """Get the structure associated with the given HTML tag.
102
+
103
+ This class method maps HTML tags to their corresponding structure types and returns the
104
+ structure associated with the provided HTML tag.
105
+
106
+ Args:
107
+ tag (str): The HTML tag for which to retrieve the structure.
108
+
109
+ Returns:
110
+ str or None: The structure associated with the HTML tag, or None if the tag is not mapped.
111
+ """
112
+
113
+ class TableConstants:
114
+ """Represents constants used for table extraction."""
115
+ TABLE_META_KEY: str
116
+ TABLE_CONTENT_KEY: str
117
+ TABLE_ROW_TYPE_KEY: str
118
+ MAX_CHAR_COUNT_PER_COLUMN: str
119
+ HEADER: str
120
+ BODY: str
121
+ FOOTER: str
@@ -0,0 +1 @@
1
+ *
Binary file
gllm_docproc.pyi ADDED
@@ -0,0 +1,149 @@
1
+ # This file was generated by Nuitka
2
+
3
+ # Stubs included by default
4
+
5
+
6
+ __name__ = ...
7
+
8
+
9
+
10
+ # Modules used internally, to allow implicit dependencies to be seen:
11
+ import os
12
+ import abc
13
+ import typing
14
+ import hashlib
15
+ import inspect
16
+ import re
17
+ import langchain_text_splitters
18
+ import gllm_docproc.chunker.table.TableChunker
19
+ import pandas
20
+ import copy
21
+ import datetime
22
+ import scrapy
23
+ import gllm_docproc.downloader.html.exception.ItemScrapeFailedException
24
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlBaseSpider
25
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapLinkSpider
26
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapSpider
27
+ import gllm_docproc.downloader.html.utils.clean_url
28
+ import gllm_docproc.downloader.html.utils.is_valid_url
29
+ import scrapy.http
30
+ import scrapy_playwright
31
+ import scrapy_playwright.page
32
+ import urllib
33
+ import urllib.parse
34
+ import scrapy.crawler
35
+ import scrapy.spiders
36
+ import scrapy.spiders.sitemap
37
+ import scrapy.utils
38
+ import scrapy.utils.sitemap
39
+ import scrapy.linkextractors
40
+ import requests
41
+ import billiard
42
+ import gllm_docproc.downloader.html.exception.ZyteApiKeyNotProvidedException
43
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ScrapeSpider
44
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ZyteScrapeSpider
45
+ import gllm_docproc.indexer.BaseIndexer
46
+ import uuid
47
+ import gllm_core
48
+ import gllm_core.utils
49
+ import gllm_core.utils.logger_manager
50
+ import gllm_datastore
51
+ import gllm_datastore.graph_data_store
52
+ import gllm_datastore.graph_data_store.llama_index_graph_rag_data_store
53
+ import gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store
54
+ import llama_index
55
+ import llama_index.core
56
+ import llama_index.core.base
57
+ import llama_index.core.base.embeddings
58
+ import llama_index.core.base.embeddings.base
59
+ import llama_index.core.base.llms
60
+ import llama_index.core.base.llms.base
61
+ import llama_index.core.indices
62
+ import llama_index.core.indices.property_graph
63
+ import llama_index.core.indices.property_graph.transformations
64
+ import llama_index.core.schema
65
+ import llama_index.core.vector_stores
66
+ import llama_index.core.vector_stores.types
67
+ import gllm_core.utils.imports
68
+ import gllm_misc
69
+ import gllm_misc.knowledge_graph
70
+ import gllm_misc.knowledge_graph.graph_store
71
+ import asyncio
72
+ import gllm_misc.multimodal_manager
73
+ import gllm_misc.multimodal_manager.audio_to_text
74
+ import gllm_misc.multimodal_manager.audio_to_text.audio_to_text
75
+ import gllm_misc.multimodal_manager.schema
76
+ import base64
77
+ import docx2python
78
+ import docx2python.docx_output
79
+ import docx
80
+ import docx.table
81
+ import docx.text
82
+ import docx.text.paragraph
83
+ import gllm_docproc.loader.html.flat.HTMLFlatLoader
84
+ import gllm_docproc.loader.html.nested.HTMLNestedLoader
85
+ import parsel
86
+ import gllm_docproc.loader.html.exception.HtmlLoadException
87
+ import tabulate
88
+ import itertools
89
+ import __future__
90
+ import re.sub
91
+ import w3lib
92
+ import w3lib.html
93
+ import json
94
+ import gllm_docproc.loader.exception.UnsupportedFileExtensionError
95
+ import csv
96
+ import io
97
+ import zipfile
98
+ import adobe
99
+ import adobe.pdfservices
100
+ import adobe.pdfservices.operation
101
+ import adobe.pdfservices.operation.auth
102
+ import adobe.pdfservices.operation.auth.service_principal_credentials
103
+ import adobe.pdfservices.operation.io
104
+ import adobe.pdfservices.operation.io.cloud_asset
105
+ import adobe.pdfservices.operation.io.stream_asset
106
+ import adobe.pdfservices.operation.pdf_services
107
+ import adobe.pdfservices.operation.pdf_services_media_type
108
+ import adobe.pdfservices.operation.pdf_services_response
109
+ import adobe.pdfservices.operation.pdfjobs
110
+ import adobe.pdfservices.operation.pdfjobs.jobs
111
+ import adobe.pdfservices.operation.pdfjobs.jobs.extract_pdf_job
112
+ import adobe.pdfservices.operation.pdfjobs.params
113
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf
114
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_element_type
115
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_pdf_params
116
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_renditions_element_type
117
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.table_structure_type
118
+ import adobe.pdfservices.operation.pdfjobs.result
119
+ import adobe.pdfservices.operation.pdfjobs.result.extract_pdf_result
120
+ import azure
121
+ import azure.ai
122
+ import azure.ai.documentintelligence
123
+ import azure.ai.documentintelligence.models
124
+ import azure.core
125
+ import azure.core.credentials
126
+ import collections
127
+ import collections.Counter
128
+ import pdfminer
129
+ import pdfminer.high_level
130
+ import pdfminer.layout
131
+ import pdfplumber
132
+ import pdfplumber._typing
133
+ import pdfplumber.page
134
+ import pdfplumber.table
135
+ import fitz
136
+ import tabula
137
+ import tabula.io
138
+ import gllm_datastore.cache_data_store
139
+ import gllm_datastore.cache_data_store.cache_data_store
140
+ import gllm_datastore.cache_data_store.utils
141
+ import openpyxl
142
+ import openpyxl.cell
143
+ import openpyxl.cell.cell
144
+ import openpyxl.worksheet
145
+ import openpyxl.worksheet.worksheet
146
+ import pydantic
147
+ import math
148
+ import gllm_docproc.parser.BaseParser
149
+ import ntpath
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.1
2
+ Name: gllm-docproc-binary
3
+ Version: 0.1.8
4
+ Summary: A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
5
+ Author: GenAI SDK Team
6
+ Author-email: gat-sdk@gdplabs.id
7
+ Requires-Python: >=3.11,<3.13
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Provides-Extra: audio
12
+ Provides-Extra: docx
13
+ Provides-Extra: html
14
+ Provides-Extra: kg
15
+ Provides-Extra: pdf
16
+ Provides-Extra: xlsx
17
+ Requires-Dist: azure-ai-documentintelligence (>=1.0.0b3,<2.0.0) ; extra == "pdf"
18
+ Requires-Dist: billiard (>=4.2.1,<5.0.0) ; extra == "html"
19
+ Requires-Dist: docx2python (==2.8.0) ; extra == "docx"
20
+ Requires-Dist: gllm-core-binary
21
+ Requires-Dist: gllm-datastore-binary
22
+ Requires-Dist: gllm-misc-binary[audio,kg]
23
+ Requires-Dist: jpype1 (>=1.5.0,<2.0.0) ; extra == "pdf"
24
+ Requires-Dist: langchain-text-splitters (>=0.3.2,<0.4.0)
25
+ Requires-Dist: libmagic (>=1.0,<2.0) ; sys_platform == "win32"
26
+ Requires-Dist: librosa (==0.10.1) ; extra == "audio"
27
+ Requires-Dist: llama-index-embeddings-openai (>=0.3.0,<0.4.0) ; extra == "kg"
28
+ Requires-Dist: llama-index-llms-openai (>=0.3.0,<0.4.0) ; extra == "kg"
29
+ Requires-Dist: openpyxl (>=3.0.10,<4.0.0) ; extra == "xlsx"
30
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
31
+ Requires-Dist: pdfminer-six (>=20231228,<20231229) ; extra == "pdf"
32
+ Requires-Dist: pdfplumber (>=0.11.4,<0.12.0) ; extra == "pdf"
33
+ Requires-Dist: pdfservices-sdk (>=4.0.0,<5.0.0) ; extra == "pdf"
34
+ Requires-Dist: playwright (>=1.40.0,<2.0.0) ; extra == "html"
35
+ Requires-Dist: pydantic (>=2.9.1,<3.0.0)
36
+ Requires-Dist: pymupdf (>=1.24.10,<2.0.0) ; extra == "pdf"
37
+ Requires-Dist: python-docx (==1.1.0) ; extra == "docx"
38
+ Requires-Dist: python-magic-bin (>=0.4.14,<0.5.0) ; sys_platform == "win32"
39
+ Requires-Dist: scrapy (>=2.11.0,<3.0.0) ; extra == "html"
40
+ Requires-Dist: scrapy-playwright (>=0.0.33,<0.1.0) ; extra == "html"
41
+ Requires-Dist: scrapy_zyte_api (>=0.12.2,<0.13.0) ; extra == "html"
42
+ Requires-Dist: tabula-py (>=2.9.3,<3.0.0) ; extra == "pdf"
43
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0) ; extra == "pdf"
44
+ Requires-Dist: tqdm (==4.66.2) ; extra == "audio"
45
+ Requires-Dist: zyte-api (>=0.4.8,<0.5.0) ; extra == "html"
46
+ Description-Content-Type: text/markdown
47
+
48
+ # GDP Labs Language Model Document Processing Orchestrator
49
+
50
+ ## Description
51
+
52
+ A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
53
+
54
+ ## Installation
55
+
56
+ 1. Python v3.11 or above:
57
+
58
+ You can install Python using [Miniconda](https://docs.anaconda.com/free/miniconda/index.html).
59
+
60
+ 2. Make sure you're in the `base` conda environment:
61
+ ```bash
62
+ conda activate
63
+ ```
64
+
65
+ 3. [Poetry](https://python-poetry.org/docs/) v1.8.1 or above:
66
+
67
+ You can install Poetry using cURL (you need Python to install Poetry):
68
+ ```bash
69
+ curl -sSL https://install.python-poetry.org | python3 -
70
+ ```
71
+
72
+ 4. Install the library using Poetry:
73
+ ```bash
74
+ # Latest
75
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git#subdirectory=libs/gllm-docproc"
76
+
77
+ # Specific version
78
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git@gllm_docproc-v0.0.1-beta.1#subdirectory=libs/gllm-docproc"
79
+
80
+ # This PR
81
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git@decision/separate-document-processing#subdirectory=libs/gllm-docproc"
82
+ ```
83
+
84
+ 5. At this step, you can deactivate Miniconda environment as Poetry will create and manage its own virtual environment for you.
85
+ ```bash
86
+ conda deactivate
87
+ ```
88
+
89
+ ## Managing Dependencies
90
+ 1. Go to root folder of `gllm-docproc` module, e.g. `cd libs/gllm-docproc`.
91
+ 2. Run `poetry shell` to create a virtual environment.
92
+ 3. Run `poetry lock` to create a lock file if you haven't done it yet.
93
+ 4. Run `poetry install` to install the `gllm-docproc` requirements for the first time.
94
+ 5. Run `poetry update` if you update any dependency module version at `pyproject.toml`.
95
+
96
+
97
+ ## Contributing
98
+ Please refer to this [Python Style Guide](https://docs.google.com/document/d/1uRggCrHnVfDPBnG641FyQBwUwLoFw0kTzNqRm92vUwM/edit?usp=sharing)
99
+ to get information about code style, documentation standard, and SCA that you need to use when contributing to this project
100
+
101
+ 1. Activate `pre-commit` hooks using `pre-commit install`
102
+ 2. Run `poetry shell` to create a virtual environment.
103
+ 3. Run `poetry lock` to create a lock file if you haven't done it yet.
104
+ 4. Run `poetry install` to install the `gllm-docproc` requirements for the first time.
105
+ 5. Run `which python` to get the path to be referenced at Visual Studio Code interpreter path (`Ctrl`+`Shift`+`P` or `Cmd`+`Shift`+`P`)
106
+ 6. Try running the unit test to see if it's working:
107
+ ```bash
108
+ poetry run pytest -s tests/unit_tests/
109
+ ```
110
+
@@ -0,0 +1,123 @@
1
+ gllm_docproc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ gllm_docproc/chunker/__init__.pyi,sha256=GOOIYg0-Fjd3g9uJDo9q8J0Gabwt_GHD_44axN6Y-qc,83
3
+ gllm_docproc/chunker/base_chunker.pyi,sha256=QPSYXqM9PdVFkQ46iPa3PsQ39N4fjK9luhBXfYHGYLU,1195
4
+ gllm_docproc/chunker/structured_element/__init__.pyi,sha256=0SzEj-OALKTVr4v4WgKJVZUnemuzMkhunZndFhPlR4w,136
5
+ gllm_docproc/chunker/structured_element/chunk_enricher.pyi,sha256=xJXf_rornekHmNiu-AiYmyT0iDw2E1AfDFf8qk-mZTw,1747
6
+ gllm_docproc/chunker/structured_element/structured_element_chunker.pyi,sha256=GGBbuWBhX58m2noi9LAl0OGWiZEG6eyzX5IL6t4cNhk,4143
7
+ gllm_docproc/chunker/table/__init__.pyi,sha256=HP844LD8YE2VZWqiYLFEOYIM3qIHggqHm5BCAn15xX4,162
8
+ gllm_docproc/chunker/table/table_chunker.pyi,sha256=EqqDL_l9olB81p3x_EG-bjNj2-RizHrLmOA5VVwQf0o,1898
9
+ gllm_docproc/converter/__init__.pyi,sha256=jqqxJRyzpYAPcH6HaFjeuVTGAoxgEvMSOYc0SR2iy6c,91
10
+ gllm_docproc/converter/base_converter.pyi,sha256=8tWnLsURnTbcMf6Wstr3skPo7oxS6NxavZVZk08zBeo,477
11
+ gllm_docproc/data_generator/__init__.pyi,sha256=9ApW8nMNwWv-VOOvH2lzWViK19p0_HjMbaW3OpZdpSo,135
12
+ gllm_docproc/data_generator/base_data_generator.pyi,sha256=ZfB_aIQn61kgf0tSnCyjgJ82SbznC8HUdFLEXfMJ7WE,825
13
+ gllm_docproc/downloader/__init__.pyi,sha256=1pilbSFFWxeOdspqPFJnfeLZx9Z-CBuS44uu-xo997I,95
14
+ gllm_docproc/downloader/base_downloader.pyi,sha256=RqzyDCfOPYmlWHEk6gO68WjrWn-wWk_E5sjfONUk8Oc,515
15
+ gllm_docproc/downloader/html/__init__.pyi,sha256=KE2vvVGYIZli01gJd69xFxA63A1Q6F76bJlwRUc6Nds,134
16
+ gllm_docproc/downloader/html/exception/__init__.pyi,sha256=Sx4tSQh97yLWL5dSd6ZtdUiNEpoMpupTVDvJWHVjA9g,290
17
+ gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi,sha256=PB0Uurm2v3anDds1eV9IGnw8fzuTjAZNc_E2sl_LrWM,623
18
+ gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi,sha256=UD1WzdOLnm4pWgu3BSVen9Hp41FzZ_9W_8eDF9FfDJQ,604
19
+ gllm_docproc/downloader/html/html_downloader.pyi,sha256=KJ2E6VMWQc6HEjhtMVAji6fzCc7C-JdwuLZOJ8id4fY,4470
20
+ gllm_docproc/downloader/html/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ gllm_docproc/downloader/html/scraper/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi,sha256=2FI0gNdvv8PDRTjfEr_1xZ1PNhufmw2i_T1PhpZReOM,667
23
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi,sha256=CW5cPK-E3iT0n3TGrludEd_3WPQZ8Y0_15SYDB8ed_w,1392
24
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi,sha256=Q_vEX9hx6EgB-0k1CPyj4fFovllpp5JfPYGq_fbE4A4,1205
25
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi,sha256=M1r5Q1J7Jf6GTYbDB-w1V3J9auAOBm5U5O1mVETdGB0,2606
26
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi,sha256=7blt3Vqz-cRpSc-25q2y0CmRbfvl43G2baCcbmYeQKY,2267
27
+ gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi,sha256=3v5wxj_DPuQc0rLJ0rKNXk2o_yDgCAvVek0CtZVC5aw,972
28
+ gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi,sha256=0-_fj_1xghGUOpGotaTS5ufzrYeF2a574QwDB7_ON2w,2162
29
+ gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi,sha256=8QISd1lr1ICoZ7ZiaKGUMqLTkecE95Ww7c8W4ONTppk,2215
30
+ gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi,sha256=9Q2_vfys5tzBZS8Qot48hkDUVbdnP19wLKiBmDM_hAc,2199
31
+ gllm_docproc/downloader/html/utils/__init__.pyi,sha256=rjlatKvMgkH6oHi6IkvKfHqZ9NiZEWeIBZtoR30gV9I,120
32
+ gllm_docproc/downloader/html/utils/web_utils.pyi,sha256=P3HSnFB4wucKNhBsJqW_wYMvhWoZ_Awivya5quPF_Z8,511
33
+ gllm_docproc/dpo_router/__init__.pyi,sha256=6rG_7XUQe-3a-gjoyWwmJQ5zMCkIqF7eL9kURy_kX6k,92
34
+ gllm_docproc/dpo_router/base_dpo_router.pyi,sha256=zBgD_U6yIuQaWSqI9B93AvFrskud0PX-CkdylSNq0YY,630
35
+ gllm_docproc/housekeeping/__init__.pyi,sha256=oL-C1roDf8io8zh90D4ugD_jAXdB_1CCcggZB1b5sTs,103
36
+ gllm_docproc/housekeeping/base_housekeeping.pyi,sha256=rptulu_Z3ufhKif8X-r3osNzHWJ7VuR1Z--sJQPXCXE,465
37
+ gllm_docproc/indexer/__init__.pyi,sha256=DAX3M09IFTLRzUmyd4XL0a6jhnZmE8UEPxZ-5dTlSt8,103
38
+ gllm_docproc/indexer/base_indexer.pyi,sha256=V5YhXNSbDixOe192JKMAD3lxEC0tyUhJCoPOrVJlRpo,1117
39
+ gllm_docproc/indexer/graph/__init__.pyi,sha256=pfuwDUwwVKP55TfX6jqQQPYETGUoMttKYcaznjH108A,142
40
+ gllm_docproc/indexer/knowledge_graph/__init__.pyi,sha256=7bSKY080wRwYlV59z1-ntIKBMQSVNmbtX3IfJBYu1b0,267
41
+ gllm_docproc/loader/__init__.pyi,sha256=Lzsi_ajlFYzu2teZ2kKiu7HHRHz7-M5ubpJ7XRmZHcg,160
42
+ gllm_docproc/loader/audio/__init__.pyi,sha256=bJMhxunQuKPnJzjXiIkJTtqdaefxb9f33Xw2QB2Wco0,83
43
+ gllm_docproc/loader/base_loader.pyi,sha256=eNorzdAQt-wMddX1S7I9glVSR7yyb56UldsimFuLnJI,1387
44
+ gllm_docproc/loader/docx/__init__.pyi,sha256=5JNIvnZyO9ZOMHJHSemAsFafwAf20iUxAiMDnR1U17I,308
45
+ gllm_docproc/loader/docx/docx2python_loader.pyi,sha256=z9EcFuqnXgvzqhnoSWft6oLgBHf63qyrvoYRLmxx0X8,2520
46
+ gllm_docproc/loader/docx/python_docx_loader.pyi,sha256=3ulT68shiwzK1dOopdgZKWcUTAuMcGhZIrJjwEhQC6M,2051
47
+ gllm_docproc/loader/docx/python_docx_table_loader.pyi,sha256=tQp_Fypis_pz_3VZddrpujV4abzX1xtoXOaDFJSe3R4,1756
48
+ gllm_docproc/loader/exception/__init__.pyi,sha256=G2CZHyjT_L0Y6h9XPjQ_MPVvg_P_mChBTQ-FtoW0cfs,157
49
+ gllm_docproc/loader/exception/unsupported_file_extension_error.pyi,sha256=c5xVjuQPNYZSuvlCHi87KMfrhBK-4R_i7-wr69QdUvQ,268
50
+ gllm_docproc/loader/html/__init__.pyi,sha256=hzLamxCb2AwLI8xO9ty0f-qY0kD97iAXCwUfBJyxkYw,244
51
+ gllm_docproc/loader/html/exception/__init__.pyi,sha256=4pNokkFZd_UWkgce5hvYUS6f5F_b1xZ_DShwmWakUDA,108
52
+ gllm_docproc/loader/html/exception/html_load_exception.pyi,sha256=R9Was3AEqcUXqqbARpgfQ5rnRS0pTL8gPXimqbWXFbo,261
53
+ gllm_docproc/loader/html/flat/__init__.pyi,sha256=GDjScQBkVstxxPnK4DIc3UE81shLXHjmJxgim7q1oj0,96
54
+ gllm_docproc/loader/html/flat/html_flat_base_handler.pyi,sha256=I0zPFG9Od4hvDu-GcN0VETDwgPwLfnT6b7cMsLdqCms,2706
55
+ gllm_docproc/loader/html/flat/html_flat_loader.pyi,sha256=i899Dop4zX2spr8K7VNwOtwax7CNqksjxGGnjFZ94hU,1839
56
+ gllm_docproc/loader/html/flat/html_flat_merger.pyi,sha256=mbgUwo90fZMiW20b-J_K89KgWCtmUL8G4KrUCzQt-5Q,1406
57
+ gllm_docproc/loader/html/html_base_loader.pyi,sha256=zPdRd0mMw90Q6pwJNzXWWEgea-7tYyHq0Pv1eSL3QZw,1185
58
+ gllm_docproc/loader/html/nested/__init__.pyi,sha256=nG7Z3zV4Z4KIG2MVWgaB7V6LA9LyN3JkgMkWdItA0dA,104
59
+ gllm_docproc/loader/html/nested/dictionary_utils.pyi,sha256=mcREqjBuSZ01idMXHu4Kfqa4aITdb4Mvlp8x7SZHX3U,1534
60
+ gllm_docproc/loader/html/nested/html_nested_base_handler.pyi,sha256=CDxr-Tq0qY5sM8fQujdxDr8YTnY2CJJj1PMeB28HzNQ,4862
61
+ gllm_docproc/loader/html/nested/html_nested_element_handler.pyi,sha256=d-vld6eNj_tjcLNRO2nX41Q_vdlZxoiq4kO2zrBuw2k,940
62
+ gllm_docproc/loader/html/nested/html_nested_loader.pyi,sha256=bOw4p5r4H7qYjY6F6ndDdlkGjjqz7Ze9A47oh19yhwo,932
63
+ gllm_docproc/loader/html/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ gllm_docproc/loader/html/utils/flat_table_utils.pyi,sha256=0kKk6-iQOa__3-JWH92uaPhLJn9kjgKLAa5bFpoAOgE,1231
65
+ gllm_docproc/loader/html/utils/html_utils.pyi,sha256=wmNdZ7pxy0J1YgtUec0gRu4t0Gi3vc3hv_CIwi9KHig,1685
66
+ gllm_docproc/loader/html/utils/removed_components.pyi,sha256=wwpHUfu1HCMNIf8pvS52ibBKx3iAOXpq0TjkFPllGm8,2143
67
+ gllm_docproc/loader/html/utils/string_utils.pyi,sha256=HiKHATp4DGiORMI5WGadVKuNsqHMLVISIJi4MRXQhUs,1125
68
+ gllm_docproc/loader/html/utils/table_utils.pyi,sha256=Y0fVMcdBSwyTZj4NCVIiUF0z5yP9ssDsOvffS5WSSRA,2839
69
+ gllm_docproc/loader/json/__init__.pyi,sha256=UwKxlnJQCMBevsVLtEdNqbUwTjSUDAiGVQ1RPCiXQYc,112
70
+ gllm_docproc/loader/json/json_elements_loader.pyi,sha256=jYR0bZoVTI678Syx8dviN5EwacQ5gXl3Af0wqtwol64,1488
71
+ gllm_docproc/loader/loader_utils.pyi,sha256=Ek1B89xX5qigd_qI8yqX8dDTKC-4uCrEeJ2AyPc6su8,1727
72
+ gllm_docproc/loader/pdf/__init__.pyi,sha256=TCxrhdrRBUdrN8vsO-gGgQx5I97KEBEwNRk2LgeU634,1212
73
+ gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi,sha256=YiKySUyKrtWVe_LmyRKmr9WHT_Z_ZakY8sojpplg5ks,1999
74
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi,sha256=spI9Xx3Bd9pNlQHUnUb19WivKKQ538RJGqI9ukYA3tM,2508
75
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi,sha256=Qh9VCxO1VCFllmgqM5lnG8BO_M3NVM9Bfdy3TqnykpQ,2758
76
+ gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi,sha256=WDY5FX7OcINZKU_IZJY7qHrmzW-YQiOLMjWmyZqS6P8,1698
77
+ gllm_docproc/loader/pdf/pdf_loader_utils.pyi,sha256=ztwgYvjM80ESRZ5obioIidGwfm-TPfcC0Qd5nPCdH1Y,2997
78
+ gllm_docproc/loader/pdf/pdf_miner_loader.pyi,sha256=Ssk6kqgzucDpK_ohASfXvg5bK53dm8ktSmhClxjDsAU,2254
79
+ gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi,sha256=vpikwwzR6j26VO3UdY_UqcUavM-TozgxSwSSYTl50HY,1551
80
+ gllm_docproc/loader/pdf/pdf_plumber_loader.pyi,sha256=EJlRHi2DvGDhdXRvpbH5ACsJTp4ocJhb8zjtxHq8kMU,2186
81
+ gllm_docproc/loader/pdf/pymupdf_loader.pyi,sha256=Hv4yTqTJzhBZIMtCinZfuUzaVjUsHK_3g5W_jQlNu7g,2786
82
+ gllm_docproc/loader/pdf/pymupdf_span_loader.pyi,sha256=CWPL9PhLeqFZ8Vf_r2q7fzUTWu2rD3x4optX7SMppiI,2806
83
+ gllm_docproc/loader/pdf/pymupdf_utils.pyi,sha256=HAfcj0jijuS5DAu0GVOU2HCuP8cbXuELtAnPNJpB3IE,1712
84
+ gllm_docproc/loader/pdf/tabula_loader.pyi,sha256=zNhW3KVr3YQLyMJQamHXd2B_QQ925r1IqprHiXM42Z8,1788
85
+ gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi,sha256=xRWomeYCvzMca1YeSNQYD8ArDpHDQf9Wz9gn3dTAfX0,2045
86
+ gllm_docproc/loader/pipeline_loader.pyi,sha256=QW7SHv-0hzF1wwXbSD3anrbJFkd601KhTzz7KA8XVVM,1852
87
+ gllm_docproc/loader/txt/__init__.pyi,sha256=fwhz7Y79UKInwtd_4tnq3sXFH4YdsfE-zjykWnZKmLI,75
88
+ gllm_docproc/loader/txt/txt_loader.pyi,sha256=rZCcwF7wUVsS2PnwwBFyTF8juLSoaO83MQRl_cOfvdQ,1198
89
+ gllm_docproc/loader/xlsx/__init__.pyi,sha256=bdF9g2QKvO0E_aTBdfbwXKivHTAohdRHBQYMou8Yr2s,95
90
+ gllm_docproc/loader/xlsx/openpyxl_loader.pyi,sha256=ebxyC1yyNyF4BHnbGScXZS3ORwOEzjXaXL17HvdHoK4,2019
91
+ gllm_docproc/model/__init__.pyi,sha256=gXhu83AiLNwoFpv_XoJdUpPTl3BVo30wV-mWkGPA1mM,151
92
+ gllm_docproc/model/element.pyi,sha256=w9VQdeR0JzRvtDOdIzQkzFEVRyeGXj_prvAnRYFkIEw,1085
93
+ gllm_docproc/model/element_metadata.pyi,sha256=p9WqqfPfRpLZaZIkkAfRVzRvt-8OhaT-gZgCc_tLXZQ,846
94
+ gllm_docproc/parser/__init__.pyi,sha256=7ylnmzWFyW4_XTVkxyj9iaIfRfBQu6_d5c-Jj781nlY,160
95
+ gllm_docproc/parser/base_parser.pyi,sha256=v_k-NpEXM4D7m3nVH7v16oTzGRV368Df8C-Tnbuqfso,1198
96
+ gllm_docproc/parser/document/__init__.pyi,sha256=J-_Vko_oV4tBjDziORx7a70Wxp0Kk29rz2JbkXLr96E,266
97
+ gllm_docproc/parser/document/docx_parser.pyi,sha256=XeMY-pNyEUoUg4xqtc7XMmGGImumWMmzC9C9H4iMTB4,1546
98
+ gllm_docproc/parser/document/pdf_parser.pyi,sha256=ZJvY4TkVsXAZw_NM6vBjqPgYmk__gI3Nkox4-oQCStQ,1546
99
+ gllm_docproc/parser/document/txt_parser.pyi,sha256=sdERCHZ_lR6B7Q0oTDKp0oQm55mPVH1M-04fuZWpzkI,885
100
+ gllm_docproc/parser/document/xlsx_parser.pyi,sha256=yylVDHfUm5i_jD0ECjo2KamVxGjsrV2_t86TTsNDgdU,1094
101
+ gllm_docproc/parser/html/__init__.pyi,sha256=DAG6lL1SfvN7euCP0XSq2Bqmai931jx9_Oj8u4LGN7I,198
102
+ gllm_docproc/parser/html/flat/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
+ gllm_docproc/parser/html/flat/html_flat_parser.pyi,sha256=CwQZbYSn4QpeQ_uzGUcbFTl9jDy8BtYiZzJuK5vUygM,1258
104
+ gllm_docproc/parser/html/nested/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
+ gllm_docproc/parser/html/nested/html_json_processor.pyi,sha256=BSOhYl_Sp3eNn5gR62LYpRjNIM52Sp9zWed5Ukyyweo,6498
106
+ gllm_docproc/parser/html/nested/html_nested_parser.pyi,sha256=O3JhTWHIXWKNCl4kfk4lWH1SQRiv3bRbu_OMDSHicFA,1014
107
+ gllm_docproc/parser/html/nested/nested_element.pyi,sha256=-NnyjyGtP6BzQ1XQQNsm66FqL9tt75EYCxNQfh30ZGc,1277
108
+ gllm_docproc/parser/pipeline_parser.pyi,sha256=TBVbZQEdU9T-oNE4r9BE0VW1wu_sjy6VPPNVvwMdG9U,1270
109
+ gllm_docproc/parser/table/__init__.pyi,sha256=a2Zvp3tmJat1Kgp7ZWbikqzAviRVGyVLUcS9PGZ4owo,112
110
+ gllm_docproc/parser/table/table_caption_parser.pyi,sha256=bpp_vKDYRPyrveOBIiURYDGh5yBDU04PoenokLlRzRY,2770
111
+ gllm_docproc/request_handler/__init__.pyi,sha256=nulIcWHCBdrJHMbfJGIwfJmDnlcAI76FnRl-0xt5myw,112
112
+ gllm_docproc/request_handler/base_request_handler.pyi,sha256=uoVfkLrbu0X1nGd1tb-4jQctbnLQvKefidgXQdvOBG4,496
113
+ gllm_docproc/response_handler/__init__.pyi,sha256=JcTvIdJ4heHLgOcB4i6KGKfOSUVhjkGNhUXmxPgelXI,116
114
+ gllm_docproc/response_handler/base_response_handler.pyi,sha256=9dd-hLidcH427r4ArLBU2_Q27OObnp-kaqGud2O8ROY,1322
115
+ gllm_docproc/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
+ gllm_docproc/utils/file_utils.pyi,sha256=Jra4dWe3OMRLX3B_zQz7JuCZfA7Tm0PJfuxJTLXHhko,2641
117
+ gllm_docproc/utils/html_constants.pyi,sha256=-4Gaw3aGFz33-8jDqT8pMw0jSaiKOhjjRHzEVVswsx8,2920
118
+ gllm_docproc.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
119
+ gllm_docproc.cp311-win_amd64.pyd,sha256=bauTXjN_Lwo3uA0aGqHfkcq40Ny5Y0uG5Cr5dJTZuOA,3102208
120
+ gllm_docproc.pyi,sha256=7HEXCdRwlq3zEY8FpmlpxffNOwehQiODxU7R1cocs2U,4952
121
+ gllm_docproc_binary-0.1.8.dist-info/METADATA,sha256=NdGZBZM1pBiIpw2FqAMRz5NHfTfmlOe1bSegEggsPZU,4725
122
+ gllm_docproc_binary-0.1.8.dist-info/WHEEL,sha256=-FZBVKyKauScY3vLa8vJR6hBCpAJfFykw2MOwlNKr1g,98
123
+ gllm_docproc_binary-0.1.8.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-win_amd64