gllm-docproc-binary 0.1.8__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gllm-docproc-binary might be problematic. Click here for more details.

Files changed (123) hide show
  1. gllm_docproc/__init__.pyi +0 -0
  2. gllm_docproc/chunker/__init__.pyi +3 -0
  3. gllm_docproc/chunker/base_chunker.pyi +29 -0
  4. gllm_docproc/chunker/structured_element/__init__.pyi +3 -0
  5. gllm_docproc/chunker/structured_element/chunk_enricher.pyi +43 -0
  6. gllm_docproc/chunker/structured_element/structured_element_chunker.pyi +80 -0
  7. gllm_docproc/chunker/table/__init__.pyi +3 -0
  8. gllm_docproc/chunker/table/table_chunker.pyi +45 -0
  9. gllm_docproc/converter/__init__.pyi +3 -0
  10. gllm_docproc/converter/base_converter.pyi +16 -0
  11. gllm_docproc/data_generator/__init__.pyi +3 -0
  12. gllm_docproc/data_generator/base_data_generator.pyi +19 -0
  13. gllm_docproc/downloader/__init__.pyi +3 -0
  14. gllm_docproc/downloader/base_downloader.pyi +16 -0
  15. gllm_docproc/downloader/html/__init__.pyi +4 -0
  16. gllm_docproc/downloader/html/exception/__init__.pyi +4 -0
  17. gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi +16 -0
  18. gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi +15 -0
  19. gllm_docproc/downloader/html/html_downloader.pyi +91 -0
  20. gllm_docproc/downloader/html/scraper/__init__.pyi +0 -0
  21. gllm_docproc/downloader/html/scraper/scraper/__init__.pyi +0 -0
  22. gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi +9 -0
  23. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi +27 -0
  24. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi +29 -0
  25. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi +61 -0
  26. gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi +50 -0
  27. gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi +22 -0
  28. gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi +57 -0
  29. gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi +51 -0
  30. gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi +45 -0
  31. gllm_docproc/downloader/html/utils/__init__.pyi +3 -0
  32. gllm_docproc/downloader/html/utils/web_utils.pyi +21 -0
  33. gllm_docproc/dpo_router/__init__.pyi +3 -0
  34. gllm_docproc/dpo_router/base_dpo_router.pyi +17 -0
  35. gllm_docproc/housekeeping/__init__.pyi +3 -0
  36. gllm_docproc/housekeeping/base_housekeeping.pyi +15 -0
  37. gllm_docproc/indexer/__init__.pyi +3 -0
  38. gllm_docproc/indexer/base_indexer.pyi +31 -0
  39. gllm_docproc/indexer/graph/__init__.pyi +3 -0
  40. gllm_docproc/indexer/knowledge_graph/__init__.pyi +4 -0
  41. gllm_docproc/loader/__init__.pyi +4 -0
  42. gllm_docproc/loader/audio/__init__.pyi +3 -0
  43. gllm_docproc/loader/base_loader.pyi +31 -0
  44. gllm_docproc/loader/docx/__init__.pyi +5 -0
  45. gllm_docproc/loader/docx/docx2python_loader.pyi +46 -0
  46. gllm_docproc/loader/docx/python_docx_loader.pyi +35 -0
  47. gllm_docproc/loader/docx/python_docx_table_loader.pyi +35 -0
  48. gllm_docproc/loader/exception/__init__.pyi +3 -0
  49. gllm_docproc/loader/exception/unsupported_file_extension_error.pyi +7 -0
  50. gllm_docproc/loader/html/__init__.pyi +5 -0
  51. gllm_docproc/loader/html/exception/__init__.pyi +3 -0
  52. gllm_docproc/loader/html/exception/html_load_exception.pyi +7 -0
  53. gllm_docproc/loader/html/flat/__init__.pyi +3 -0
  54. gllm_docproc/loader/html/flat/html_flat_base_handler.pyi +52 -0
  55. gllm_docproc/loader/html/flat/html_flat_loader.pyi +30 -0
  56. gllm_docproc/loader/html/flat/html_flat_merger.pyi +22 -0
  57. gllm_docproc/loader/html/html_base_loader.pyi +25 -0
  58. gllm_docproc/loader/html/nested/__init__.pyi +3 -0
  59. gllm_docproc/loader/html/nested/dictionary_utils.pyi +40 -0
  60. gllm_docproc/loader/html/nested/html_nested_base_handler.pyi +128 -0
  61. gllm_docproc/loader/html/nested/html_nested_element_handler.pyi +24 -0
  62. gllm_docproc/loader/html/nested/html_nested_loader.pyi +15 -0
  63. gllm_docproc/loader/html/utils/__init__.pyi +0 -0
  64. gllm_docproc/loader/html/utils/flat_table_utils.pyi +29 -0
  65. gllm_docproc/loader/html/utils/html_utils.pyi +41 -0
  66. gllm_docproc/loader/html/utils/removed_components.pyi +53 -0
  67. gllm_docproc/loader/html/utils/string_utils.pyi +33 -0
  68. gllm_docproc/loader/html/utils/table_utils.pyi +78 -0
  69. gllm_docproc/loader/json/__init__.pyi +3 -0
  70. gllm_docproc/loader/json/json_elements_loader.pyi +33 -0
  71. gllm_docproc/loader/loader_utils.pyi +42 -0
  72. gllm_docproc/loader/pdf/__init__.pyi +13 -0
  73. gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi +40 -0
  74. gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi +45 -0
  75. gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi +50 -0
  76. gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi +38 -0
  77. gllm_docproc/loader/pdf/pdf_loader_utils.pyi +59 -0
  78. gllm_docproc/loader/pdf/pdf_miner_loader.pyi +38 -0
  79. gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi +33 -0
  80. gllm_docproc/loader/pdf/pdf_plumber_loader.pyi +38 -0
  81. gllm_docproc/loader/pdf/pymupdf_loader.pyi +43 -0
  82. gllm_docproc/loader/pdf/pymupdf_span_loader.pyi +44 -0
  83. gllm_docproc/loader/pdf/pymupdf_utils.pyi +34 -0
  84. gllm_docproc/loader/pdf/tabula_loader.pyi +32 -0
  85. gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi +37 -0
  86. gllm_docproc/loader/pipeline_loader.pyi +48 -0
  87. gllm_docproc/loader/txt/__init__.pyi +3 -0
  88. gllm_docproc/loader/txt/txt_loader.pyi +26 -0
  89. gllm_docproc/loader/xlsx/__init__.pyi +3 -0
  90. gllm_docproc/loader/xlsx/openpyxl_loader.pyi +37 -0
  91. gllm_docproc/model/__init__.pyi +4 -0
  92. gllm_docproc/model/element.pyi +37 -0
  93. gllm_docproc/model/element_metadata.pyi +35 -0
  94. gllm_docproc/parser/__init__.pyi +4 -0
  95. gllm_docproc/parser/base_parser.pyi +29 -0
  96. gllm_docproc/parser/document/__init__.pyi +6 -0
  97. gllm_docproc/parser/document/docx_parser.pyi +27 -0
  98. gllm_docproc/parser/document/pdf_parser.pyi +35 -0
  99. gllm_docproc/parser/document/txt_parser.pyi +22 -0
  100. gllm_docproc/parser/document/xlsx_parser.pyi +26 -0
  101. gllm_docproc/parser/html/__init__.pyi +4 -0
  102. gllm_docproc/parser/html/flat/__init__.pyi +0 -0
  103. gllm_docproc/parser/html/flat/html_flat_parser.pyi +27 -0
  104. gllm_docproc/parser/html/nested/__init__.pyi +0 -0
  105. gllm_docproc/parser/html/nested/html_json_processor.pyi +158 -0
  106. gllm_docproc/parser/html/nested/html_nested_parser.pyi +24 -0
  107. gllm_docproc/parser/html/nested/nested_element.pyi +31 -0
  108. gllm_docproc/parser/pipeline_parser.pyi +33 -0
  109. gllm_docproc/parser/table/__init__.pyi +3 -0
  110. gllm_docproc/parser/table/table_caption_parser.pyi +66 -0
  111. gllm_docproc/request_handler/__init__.pyi +3 -0
  112. gllm_docproc/request_handler/base_request_handler.pyi +17 -0
  113. gllm_docproc/response_handler/__init__.pyi +3 -0
  114. gllm_docproc/response_handler/base_response_handler.pyi +39 -0
  115. gllm_docproc/utils/__init__.pyi +0 -0
  116. gllm_docproc/utils/file_utils.pyi +76 -0
  117. gllm_docproc/utils/html_constants.pyi +121 -0
  118. gllm_docproc.build/.gitignore +1 -0
  119. gllm_docproc.cpython-311-darwin.so +0 -0
  120. gllm_docproc.pyi +149 -0
  121. gllm_docproc_binary-0.1.8.dist-info/METADATA +110 -0
  122. gllm_docproc_binary-0.1.8.dist-info/RECORD +123 -0
  123. gllm_docproc_binary-0.1.8.dist-info/WHEEL +4 -0
@@ -0,0 +1,76 @@
1
+ from typing import Any
2
+
3
+ def create_folder(folder_path: str) -> None:
4
+ """Create a folder.
5
+
6
+ This function check if the folder path exists. If the folder path does not
7
+ exist, the function creates a folder in the specified folder path.
8
+
9
+ Args:
10
+ folder_path (str): The folder path to create.
11
+ """
12
+ def create_full_path(dir_path: str, filename: str, file_extension: str) -> str:
13
+ """Create a full path for a file.
14
+
15
+ This function creates a full path for a file by combining the directory
16
+ path, the filename, and the file extension.
17
+
18
+ Args:
19
+ dir_path (str): The directory path.
20
+ filename (str): The filename.
21
+ file_extension (str): The file extension.
22
+
23
+ Returns:
24
+ str: The full path for the file.
25
+ """
26
+ def save_to_json(elements: list[dict[str, Any]] | dict[str, Any], folder_path: str, file_name: str) -> None:
27
+ """Save a list of elements to a JSON file.
28
+
29
+ This function saves a list of elements to a JSON file. The function takes
30
+ the list of elements, the folder path, and the file name as input and saves
31
+ the elements to a JSON file in the specified folder.
32
+
33
+ Args:
34
+ elements (list[dict[str, Any]] | dict[str, Any]): The list of elements to save.
35
+ folder_path (str): The folder path to save the JSON file.
36
+ file_name (str): The file name of the JSON file.
37
+
38
+ Returns:
39
+ None
40
+ """
41
+ def save_to_csv(elements: list[dict[str, Any]], folder_path: str, file_name: str) -> None:
42
+ """Save a list of elements to a CSV file.
43
+
44
+ This function saves a list of elements to a CSV file. The function takes
45
+ the list of elements, the folder path, and the file name as input and saves
46
+ the elements to a CSV file in the specified folder.
47
+
48
+ Args:
49
+ elements (list[dict[str, Any]]): The list of elements to save.
50
+ folder_path (str): The folder path to save the CSV file.
51
+ file_name (str): The file name of the CSV file.
52
+
53
+ Returns:
54
+ None
55
+ """
56
+ def save_file(content: str, filename: str):
57
+ """Save the content to a file.
58
+
59
+ Args:
60
+ content (str): The content to save.
61
+ filename (str): The filename to save the content to.
62
+
63
+ Returns:
64
+ None
65
+ """
66
+ def read_json_file(file_path: str) -> list[dict[str, Any]] | dict[str, Any]:
67
+ """Read a JSON file.
68
+
69
+ This function reads a JSON file and returns the content of the JSON file.
70
+
71
+ Args:
72
+ file_path (str): The path of the JSON file to read.
73
+
74
+ Returns:
75
+ list[dict[str, Any]] | dict[str, Any]: The content of the JSON file.
76
+ """
@@ -0,0 +1,121 @@
1
+ from _typeshed import Incomplete
2
+ from gllm_docproc.model.element import AUDIO as AUDIO, FOOTER as FOOTER, HEADER as HEADER, HEADING as HEADING, IMAGE as IMAGE, TABLE as TABLE, TITLE as TITLE, VIDEO as VIDEO
3
+
4
+ FORMATTING_TAGS: Incomplete
5
+ SPACING: str
6
+
7
+ class MetaDataKeys:
8
+ """Represents keys commonly used in metadata for web content."""
9
+ CHARSET: str
10
+ PROPERTY: str
11
+ CONTENT: str
12
+ NAME: str
13
+ HTTP_EQUIV: str
14
+ URL: str
15
+ TITLE: str
16
+ METADATA: str
17
+ SOURCE: str
18
+ SOURCE_TYPE: str
19
+ LOADED_DATETIME: str
20
+
21
+ class ContentDataKeys:
22
+ """Represents keys commonly used in web content data."""
23
+ TAG: str
24
+ CONTENT: str
25
+ SOURCE: str
26
+ TYPE: str
27
+ SRC: str
28
+ PLACEHOLDER: str
29
+ TABLE: str
30
+ HREF: str
31
+ ALT: str
32
+ CLASS: str
33
+ VALUE: str
34
+
35
+ class ItemDataKeys:
36
+ """Represents keys used for handling item data."""
37
+ ELEMENTS: str
38
+ TEXT: str
39
+ STRUCTURE: str
40
+ ELEMENT_ID: str
41
+ INDEX: str
42
+ LINK: str
43
+ FORMATS: str
44
+ COMBINE_PREV: str
45
+ LIST_TYPE: str
46
+ IS_LIST_FIRST_ITEM: str
47
+ METADATA: str
48
+ URL: str
49
+ GROUP_ID: str
50
+ PARENT_ID: str
51
+ LINE_BREAK: str
52
+ HTML_TAGS: str
53
+ ROW_ITEM: str
54
+ COLSPAN: str
55
+ ROWSPAN: str
56
+
57
+ class HTMLTags:
58
+ """Represents commonly used HTML tags as constants."""
59
+ IMG: str
60
+ INPUT: str
61
+ SVG: str
62
+ SOURCE: str
63
+ TABLE: str
64
+ A: str
65
+ VIDEO: str
66
+ AUDIO: str
67
+ IFRAME: str
68
+ EMBED: str
69
+ TEXT: str
70
+ UL: str
71
+ OL: str
72
+ LI: str
73
+ P: str
74
+ BR: str
75
+ H: Incomplete
76
+ HEADER: str
77
+ TITLE: str
78
+ FOOTER: str
79
+ MEDIA_TAGS: Incomplete
80
+ TR: str
81
+ TD: str
82
+ TH: str
83
+ TBODY: str
84
+ TFOOT: str
85
+ THEAD: str
86
+
87
+ class ErrorMessage:
88
+ """Represents predefined error messages used in the application."""
89
+ ERROR_FAILED_SAVE_JSON: str
90
+ ERROR_FAILED_SAVE_CSV: str
91
+ ERROR_FAILED_EXTRACT_DATA: str
92
+ ERROR_MISSING_KEY: str
93
+ ERROR_FAILED_TO_PROCESS_ITEM: str
94
+ ERROR_FAILED_TO_OPEN_SPIDER: str
95
+ ERROR_UNKNOWN_SOURCE: str
96
+
97
+ class Structure:
98
+ """Represents the structure of the content."""
99
+ @classmethod
100
+ def get_structure(cls, tag: str):
101
+ """Get the structure associated with the given HTML tag.
102
+
103
+ This class method maps HTML tags to their corresponding structure types and returns the
104
+ structure associated with the provided HTML tag.
105
+
106
+ Args:
107
+ tag (str): The HTML tag for which to retrieve the structure.
108
+
109
+ Returns:
110
+ str or None: The structure associated with the HTML tag, or None if the tag is not mapped.
111
+ """
112
+
113
+ class TableConstants:
114
+ """Represents constants used for table extraction."""
115
+ TABLE_META_KEY: str
116
+ TABLE_CONTENT_KEY: str
117
+ TABLE_ROW_TYPE_KEY: str
118
+ MAX_CHAR_COUNT_PER_COLUMN: str
119
+ HEADER: str
120
+ BODY: str
121
+ FOOTER: str
@@ -0,0 +1 @@
1
+ *
Binary file
gllm_docproc.pyi ADDED
@@ -0,0 +1,149 @@
1
+ # This file was generated by Nuitka
2
+
3
+ # Stubs included by default
4
+
5
+
6
+ __name__ = ...
7
+
8
+
9
+
10
+ # Modules used internally, to allow implicit dependencies to be seen:
11
+ import os
12
+ import abc
13
+ import typing
14
+ import hashlib
15
+ import inspect
16
+ import re
17
+ import langchain_text_splitters
18
+ import gllm_docproc.chunker.table.TableChunker
19
+ import pandas
20
+ import copy
21
+ import datetime
22
+ import scrapy
23
+ import gllm_docproc.downloader.html.exception.ItemScrapeFailedException
24
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlBaseSpider
25
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapLinkSpider
26
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.CrawlSitemapSpider
27
+ import gllm_docproc.downloader.html.utils.clean_url
28
+ import gllm_docproc.downloader.html.utils.is_valid_url
29
+ import scrapy.http
30
+ import scrapy_playwright
31
+ import scrapy_playwright.page
32
+ import urllib
33
+ import urllib.parse
34
+ import scrapy.crawler
35
+ import scrapy.spiders
36
+ import scrapy.spiders.sitemap
37
+ import scrapy.utils
38
+ import scrapy.utils.sitemap
39
+ import scrapy.linkextractors
40
+ import requests
41
+ import billiard
42
+ import gllm_docproc.downloader.html.exception.ZyteApiKeyNotProvidedException
43
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ScrapeSpider
44
+ import gllm_docproc.downloader.html.scraper.scraper.spiders.ZyteScrapeSpider
45
+ import gllm_docproc.indexer.BaseIndexer
46
+ import uuid
47
+ import gllm_core
48
+ import gllm_core.utils
49
+ import gllm_core.utils.logger_manager
50
+ import gllm_datastore
51
+ import gllm_datastore.graph_data_store
52
+ import gllm_datastore.graph_data_store.llama_index_graph_rag_data_store
53
+ import gllm_datastore.graph_data_store.llama_index_neo4j_graph_rag_data_store
54
+ import llama_index
55
+ import llama_index.core
56
+ import llama_index.core.base
57
+ import llama_index.core.base.embeddings
58
+ import llama_index.core.base.embeddings.base
59
+ import llama_index.core.base.llms
60
+ import llama_index.core.base.llms.base
61
+ import llama_index.core.indices
62
+ import llama_index.core.indices.property_graph
63
+ import llama_index.core.indices.property_graph.transformations
64
+ import llama_index.core.schema
65
+ import llama_index.core.vector_stores
66
+ import llama_index.core.vector_stores.types
67
+ import gllm_core.utils.imports
68
+ import gllm_misc
69
+ import gllm_misc.knowledge_graph
70
+ import gllm_misc.knowledge_graph.graph_store
71
+ import asyncio
72
+ import gllm_misc.multimodal_manager
73
+ import gllm_misc.multimodal_manager.audio_to_text
74
+ import gllm_misc.multimodal_manager.audio_to_text.audio_to_text
75
+ import gllm_misc.multimodal_manager.schema
76
+ import base64
77
+ import docx2python
78
+ import docx2python.docx_output
79
+ import docx
80
+ import docx.table
81
+ import docx.text
82
+ import docx.text.paragraph
83
+ import gllm_docproc.loader.html.flat.HTMLFlatLoader
84
+ import gllm_docproc.loader.html.nested.HTMLNestedLoader
85
+ import parsel
86
+ import gllm_docproc.loader.html.exception.HtmlLoadException
87
+ import tabulate
88
+ import itertools
89
+ import __future__
90
+ import re.sub
91
+ import w3lib
92
+ import w3lib.html
93
+ import json
94
+ import gllm_docproc.loader.exception.UnsupportedFileExtensionError
95
+ import csv
96
+ import io
97
+ import zipfile
98
+ import adobe
99
+ import adobe.pdfservices
100
+ import adobe.pdfservices.operation
101
+ import adobe.pdfservices.operation.auth
102
+ import adobe.pdfservices.operation.auth.service_principal_credentials
103
+ import adobe.pdfservices.operation.io
104
+ import adobe.pdfservices.operation.io.cloud_asset
105
+ import adobe.pdfservices.operation.io.stream_asset
106
+ import adobe.pdfservices.operation.pdf_services
107
+ import adobe.pdfservices.operation.pdf_services_media_type
108
+ import adobe.pdfservices.operation.pdf_services_response
109
+ import adobe.pdfservices.operation.pdfjobs
110
+ import adobe.pdfservices.operation.pdfjobs.jobs
111
+ import adobe.pdfservices.operation.pdfjobs.jobs.extract_pdf_job
112
+ import adobe.pdfservices.operation.pdfjobs.params
113
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf
114
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_element_type
115
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_pdf_params
116
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_renditions_element_type
117
+ import adobe.pdfservices.operation.pdfjobs.params.extract_pdf.table_structure_type
118
+ import adobe.pdfservices.operation.pdfjobs.result
119
+ import adobe.pdfservices.operation.pdfjobs.result.extract_pdf_result
120
+ import azure
121
+ import azure.ai
122
+ import azure.ai.documentintelligence
123
+ import azure.ai.documentintelligence.models
124
+ import azure.core
125
+ import azure.core.credentials
126
+ import collections
127
+ import collections.Counter
128
+ import pdfminer
129
+ import pdfminer.high_level
130
+ import pdfminer.layout
131
+ import pdfplumber
132
+ import pdfplumber._typing
133
+ import pdfplumber.page
134
+ import pdfplumber.table
135
+ import fitz
136
+ import tabula
137
+ import tabula.io
138
+ import gllm_datastore.cache_data_store
139
+ import gllm_datastore.cache_data_store.cache_data_store
140
+ import gllm_datastore.cache_data_store.utils
141
+ import openpyxl
142
+ import openpyxl.cell
143
+ import openpyxl.cell.cell
144
+ import openpyxl.worksheet
145
+ import openpyxl.worksheet.worksheet
146
+ import pydantic
147
+ import math
148
+ import gllm_docproc.parser.BaseParser
149
+ import posixpath
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.1
2
+ Name: gllm-docproc-binary
3
+ Version: 0.1.8
4
+ Summary: A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
5
+ Author: GenAI SDK Team
6
+ Author-email: gat-sdk@gdplabs.id
7
+ Requires-Python: >=3.11,<3.13
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Provides-Extra: audio
12
+ Provides-Extra: docx
13
+ Provides-Extra: html
14
+ Provides-Extra: kg
15
+ Provides-Extra: pdf
16
+ Provides-Extra: xlsx
17
+ Requires-Dist: azure-ai-documentintelligence (>=1.0.0b3,<2.0.0) ; extra == "pdf"
18
+ Requires-Dist: billiard (>=4.2.1,<5.0.0) ; extra == "html"
19
+ Requires-Dist: docx2python (==2.8.0) ; extra == "docx"
20
+ Requires-Dist: gllm-core-binary
21
+ Requires-Dist: gllm-datastore-binary
22
+ Requires-Dist: gllm-misc-binary[audio,kg]
23
+ Requires-Dist: jpype1 (>=1.5.0,<2.0.0) ; extra == "pdf"
24
+ Requires-Dist: langchain-text-splitters (>=0.3.2,<0.4.0)
25
+ Requires-Dist: libmagic (>=1.0,<2.0) ; sys_platform == "win32"
26
+ Requires-Dist: librosa (==0.10.1) ; extra == "audio"
27
+ Requires-Dist: llama-index-embeddings-openai (>=0.3.0,<0.4.0) ; extra == "kg"
28
+ Requires-Dist: llama-index-llms-openai (>=0.3.0,<0.4.0) ; extra == "kg"
29
+ Requires-Dist: openpyxl (>=3.0.10,<4.0.0) ; extra == "xlsx"
30
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
31
+ Requires-Dist: pdfminer-six (>=20231228,<20231229) ; extra == "pdf"
32
+ Requires-Dist: pdfplumber (>=0.11.4,<0.12.0) ; extra == "pdf"
33
+ Requires-Dist: pdfservices-sdk (>=4.0.0,<5.0.0) ; extra == "pdf"
34
+ Requires-Dist: playwright (>=1.40.0,<2.0.0) ; extra == "html"
35
+ Requires-Dist: pydantic (>=2.9.1,<3.0.0)
36
+ Requires-Dist: pymupdf (>=1.24.10,<2.0.0) ; extra == "pdf"
37
+ Requires-Dist: python-docx (==1.1.0) ; extra == "docx"
38
+ Requires-Dist: python-magic-bin (>=0.4.14,<0.5.0) ; sys_platform == "win32"
39
+ Requires-Dist: scrapy (>=2.11.0,<3.0.0) ; extra == "html"
40
+ Requires-Dist: scrapy-playwright (>=0.0.33,<0.1.0) ; extra == "html"
41
+ Requires-Dist: scrapy_zyte_api (>=0.12.2,<0.13.0) ; extra == "html"
42
+ Requires-Dist: tabula-py (>=2.9.3,<3.0.0) ; extra == "pdf"
43
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0) ; extra == "pdf"
44
+ Requires-Dist: tqdm (==4.66.2) ; extra == "audio"
45
+ Requires-Dist: zyte-api (>=0.4.8,<0.5.0) ; extra == "html"
46
+ Description-Content-Type: text/markdown
47
+
48
+ # GDP Labs Language Model Document Processing Orchestrator
49
+
50
+ ## Description
51
+
52
+ A library for orchestrating the processing of document. Typically in a Gen AI applications (but not limited to just Gen AI).
53
+
54
+ ## Installation
55
+
56
+ 1. Python v3.11 or above:
57
+
58
+ You can install Python using [Miniconda](https://docs.anaconda.com/free/miniconda/index.html).
59
+
60
+ 2. Make sure you're in the `base` conda environment:
61
+ ```bash
62
+ conda activate
63
+ ```
64
+
65
+ 3. [Poetry](https://python-poetry.org/docs/) v1.8.1 or above:
66
+
67
+ You can install Poetry using cURL (you need Python to install Poetry):
68
+ ```bash
69
+ curl -sSL https://install.python-poetry.org | python3 -
70
+ ```
71
+
72
+ 4. Install the library using Poetry:
73
+ ```bash
74
+ # Latest
75
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git#subdirectory=libs/gllm-docproc"
76
+
77
+ # Specific version
78
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git@gllm_docproc-v0.0.1-beta.1#subdirectory=libs/gllm-docproc"
79
+
80
+ # This PR
81
+ poetry add "git+ssh://git@github.com/GDP-ADMIN/gen-ai-internal.git@decision/separate-document-processing#subdirectory=libs/gllm-docproc"
82
+ ```
83
+
84
+ 5. At this step, you can deactivate Miniconda environment as Poetry will create and manage its own virtual environment for you.
85
+ ```bash
86
+ conda deactivate
87
+ ```
88
+
89
+ ## Managing Dependencies
90
+ 1. Go to root folder of `gllm-docproc` module, e.g. `cd libs/gllm-docproc`.
91
+ 2. Run `poetry shell` to create a virtual environment.
92
+ 3. Run `poetry lock` to create a lock file if you haven't done it yet.
93
+ 4. Run `poetry install` to install the `gllm-docproc` requirements for the first time.
94
+ 5. Run `poetry update` if you update any dependency module version at `pyproject.toml`.
95
+
96
+
97
+ ## Contributing
98
+ Please refer to this [Python Style Guide](https://docs.google.com/document/d/1uRggCrHnVfDPBnG641FyQBwUwLoFw0kTzNqRm92vUwM/edit?usp=sharing)
99
+ to get information about code style, documentation standard, and SCA that you need to use when contributing to this project
100
+
101
+ 1. Activate `pre-commit` hooks using `pre-commit install`
102
+ 2. Run `poetry shell` to create a virtual environment.
103
+ 3. Run `poetry lock` to create a lock file if you haven't done it yet.
104
+ 4. Run `poetry install` to install the `gllm-docproc` requirements for the first time.
105
+ 5. Run `which python` to get the path to be referenced at Visual Studio Code interpreter path (`Ctrl`+`Shift`+`P` or `Cmd`+`Shift`+`P`)
106
+ 6. Try running the unit test to see if it's working:
107
+ ```bash
108
+ poetry run pytest -s tests/unit_tests/
109
+ ```
110
+
@@ -0,0 +1,123 @@
1
+ gllm_docproc/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ gllm_docproc/chunker/__init__.pyi,sha256=eA7-yNoiTmdgdk8KUEiFSMNu4nFm7J-D7BV-7J4_QgQ,80
3
+ gllm_docproc/chunker/base_chunker.pyi,sha256=60uhKGbT3SG6Tms3CCUMhmtUCU7O9LvLoO20YBAlI8c,1166
4
+ gllm_docproc/chunker/structured_element/__init__.pyi,sha256=FGOqqpZRBIMbsHGRiS3DehxJ3dG6wOYTBYThVqytiSI,133
5
+ gllm_docproc/chunker/structured_element/chunk_enricher.pyi,sha256=oYNCRlwb_pmEolOvWRSxIbUaTSMcc0z-DT1LqcsQMDw,1704
6
+ gllm_docproc/chunker/structured_element/structured_element_chunker.pyi,sha256=7nTCRRpYbdonXARFjiWXdrSO3RWVhiWCXeu7PJzy5F4,4063
7
+ gllm_docproc/chunker/table/__init__.pyi,sha256=oYBcaQP91xP32ITWWhYezzpYkIytbZ_P9VrCpmbQmjM,159
8
+ gllm_docproc/chunker/table/table_chunker.pyi,sha256=Nb4xDLl5ZR_N8ilRmHZyviNaqHEbeuj0WcNL7owVrP0,1853
9
+ gllm_docproc/converter/__init__.pyi,sha256=t8FWQ6ivPnaIHXKD9tNlaelmtXIguvEPwgNUobwHobE,88
10
+ gllm_docproc/converter/base_converter.pyi,sha256=mfWj9QsTtAApres3cA9xU1YiLv6zyF6NXZG6tyFCV_w,461
11
+ gllm_docproc/data_generator/__init__.pyi,sha256=AGUTikBljaFpj2fNUBVJrtvoiCXkGppGCp-iL1Ay7VU,132
12
+ gllm_docproc/data_generator/base_data_generator.pyi,sha256=KPGRY-DfqDHiJaFQTBU5MCrcaEB5nu6lxzcXmK5Dof8,806
13
+ gllm_docproc/downloader/__init__.pyi,sha256=hHhXMBFcN_SL0wOw7aVPBhB_vxkfIToSiiNtY6T8oBQ,92
14
+ gllm_docproc/downloader/base_downloader.pyi,sha256=4fhfI9m_7Q58ZGS7_o8mWVTF8BusDShgR2iss7WFZAc,499
15
+ gllm_docproc/downloader/html/__init__.pyi,sha256=6Dta8NuG2VHJh7Q2TqCcgwH-q9eI_1uziLQtBxINGmg,130
16
+ gllm_docproc/downloader/html/exception/__init__.pyi,sha256=ZEV6EjWuDZ6Rr-mRm3gad8DuvmNivr2v8fYVXxFB-ks,286
17
+ gllm_docproc/downloader/html/exception/item_scrape_failed_exception.pyi,sha256=MrQnR6wuxec85q-54QZ27kCnw4p4CUj8hawfAMldVCE,607
18
+ gllm_docproc/downloader/html/exception/zyte_api_key_not_provided_exception.pyi,sha256=gjX1ovxTSMQ3dCgYA-434Hp-rURVB5xEtlfMVvpsAmU,589
19
+ gllm_docproc/downloader/html/html_downloader.pyi,sha256=02UKPi16YCO9EC8C5asN_4v65KAubIDHgHgu4ddLwDw,4379
20
+ gllm_docproc/downloader/html/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ gllm_docproc/downloader/html/scraper/scraper/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ gllm_docproc/downloader/html/scraper/scraper/spiders/__init__.pyi,sha256=oPIUQoIMRMTuyUOMvMZgzRjyVybMYNqlrDmD5ewMWVI,658
23
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_pdf_spider.pyi,sha256=0wIdSVXbt9kz6jufaefl_Q3nW_RphBZ20thG14Ma3YU,1365
24
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_link_spider.pyi,sha256=7eTTi7ty6nA47r4J_iNHaAuB0ppa6vgP2ySzyZOiuLw,1176
25
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_sitemap_spider.pyi,sha256=UFIV1zViovRrvpXScfQU3KSYT8N5s98mFLha_SAvSu4,2545
26
+ gllm_docproc/downloader/html/scraper/scraper/spiders/crawl_spider.pyi,sha256=ZlkojagDlxeJPOYGQD2cI5anZhMXewZYM2_o_QC0EDA,2217
27
+ gllm_docproc/downloader/html/scraper/scraper/spiders/playwright_scrape_spider.pyi,sha256=ezzD70mUTcOC7k7uZ3Tydo1TXNCcXJ3jTS-Mz0UjoUU,950
28
+ gllm_docproc/downloader/html/scraper/scraper/spiders/scrape_spider.pyi,sha256=zTpe6zxJq77wmz1zOv-35vf62NSvmMuM2AJhUz89-MA,2105
29
+ gllm_docproc/downloader/html/scraper/scraper/spiders/zyte_scrape_spider.pyi,sha256=2fhFXS3I99MjDd56b0SukB04Q_io4-h2aiud9GWSaWQ,2164
30
+ gllm_docproc/downloader/html/scraper/web_scraper_executor.pyi,sha256=DBfbN6pP2N9PdyCatDImGqRVPExTMGfQJCIBrLacSTs,2154
31
+ gllm_docproc/downloader/html/utils/__init__.pyi,sha256=Sa2vBqKvbLroa3cJ58OWVhRW9VBWmj4fsImeImWaol4,117
32
+ gllm_docproc/downloader/html/utils/web_utils.pyi,sha256=xqHtzk5csplpaeesGSNaUN-wUvtUMOp8BjA2UwurCUM,490
33
+ gllm_docproc/dpo_router/__init__.pyi,sha256=jxXnlqGqBadRPwS-rUDYnxWofpMDnhHkwp69PU_baZY,89
34
+ gllm_docproc/dpo_router/base_dpo_router.pyi,sha256=oAXDIXvK4D3I9uBnYM1f8gWxXNIAigtoe6VB5RxtAcY,613
35
+ gllm_docproc/housekeeping/__init__.pyi,sha256=hCHqIldkCC8OetWprbipx1XwenES9iGHlaNRTwzqR4g,100
36
+ gllm_docproc/housekeeping/base_housekeeping.pyi,sha256=Xv6KFaBFqAJoNKvX96OSSrQC8KToaCddGIRWsH0W7Z8,450
37
+ gllm_docproc/indexer/__init__.pyi,sha256=2rF0XCyWpcpYif-We8ZC-ZpCAzLpMoClEmXhNVh2-s8,100
38
+ gllm_docproc/indexer/base_indexer.pyi,sha256=EjQZRcP_6KDAnEGeBWKqYY4ealCD1bnr16Y6_qpex6w,1086
39
+ gllm_docproc/indexer/graph/__init__.pyi,sha256=wMtI-lHQ1oa5azRAfWw0U6urOML9o6lngjg5U70N1_k,139
40
+ gllm_docproc/indexer/knowledge_graph/__init__.pyi,sha256=olJyC7PWty8bpL4qOBxpDxeMfxmcPA8ir7CQ2sookJk,263
41
+ gllm_docproc/loader/__init__.pyi,sha256=STxqJvk7NyqQtJ1pmBycKaKgt2aNYaFfpXoTqMCzY9g,156
42
+ gllm_docproc/loader/audio/__init__.pyi,sha256=dNPtbcP9rdG7Dr_jYcUEZ_rpyK2X0LdFC9S_JG8PcpQ,80
43
+ gllm_docproc/loader/base_loader.pyi,sha256=8JEx4EGJOfdga5QK9Ujb9poFC5tVwUjT7x4IASYNBNY,1356
44
+ gllm_docproc/loader/docx/__init__.pyi,sha256=R69OD2iJ7-M9mn6Nm3_iqIY1BxHYodHemyYuWE2LMyg,303
45
+ gllm_docproc/loader/docx/docx2python_loader.pyi,sha256=0tDTrJLIcmHmdk9kIzX-ba41bW0S1-foL91sHjNaZW8,2474
46
+ gllm_docproc/loader/docx/python_docx_loader.pyi,sha256=SBp4IqD19zG-MJ_doOrW7JIbbtlPBvn3--uCWEv7loI,2016
47
+ gllm_docproc/loader/docx/python_docx_table_loader.pyi,sha256=RokUQ6a6k-pDbGTqUJyoVVG89Wo5G_dfSUjshFYHl2A,1721
48
+ gllm_docproc/loader/exception/__init__.pyi,sha256=5B7ElK7ImkTlCzKGQD1DW4LZH_5SrRjBQALS0fDjR5M,154
49
+ gllm_docproc/loader/exception/unsupported_file_extension_error.pyi,sha256=ux-G38V35LP3Y__I98ccTv5ekoOdOXUKK2UQh0WYd3g,261
50
+ gllm_docproc/loader/html/__init__.pyi,sha256=rBA2i4IXnLnpLPi0SHvGtUMVgJ1pFMV-48pIrS-1cKQ,239
51
+ gllm_docproc/loader/html/exception/__init__.pyi,sha256=itSjlJJFFC9Z9ZYdby5iW1yd77eKws0tEwOkQmnVHm0,105
52
+ gllm_docproc/loader/html/exception/html_load_exception.pyi,sha256=7umMZsJSHs3H-gyIsxRtaGSmvpVskU25MEGS6DLy9-w,254
53
+ gllm_docproc/loader/html/flat/__init__.pyi,sha256=Q7WDbp0ZZ1ATcvn8I4Ix1OBJYGXajHMXxXceAmig8c0,93
54
+ gllm_docproc/loader/html/flat/html_flat_base_handler.pyi,sha256=eQ5tD4AAg4HfTZbT6FH0MSWSbsZaZM_1w7J87QNj_Uc,2654
55
+ gllm_docproc/loader/html/flat/html_flat_loader.pyi,sha256=MLA-8IbZxV03wfQDgpbiSGvd1IesNE3nhVcr_DzyD2A,1809
56
+ gllm_docproc/loader/html/flat/html_flat_merger.pyi,sha256=mnXj-tjis3r3fheY1WYzj5brxdVgZBByuvoKH5jaHtE,1384
57
+ gllm_docproc/loader/html/html_base_loader.pyi,sha256=_cQCV_DGY5XkfU0VlVirh2im9bFO0WK_uE4RZCKe7z8,1160
58
+ gllm_docproc/loader/html/nested/__init__.pyi,sha256=pd14asYg0pcfwnlLkvE4rpfIKFphdg86Kfy_Jun74ho,101
59
+ gllm_docproc/loader/html/nested/dictionary_utils.pyi,sha256=TNcvYtYUv41pZeMaQVX3WW9gIe0elI9l_MmzIWzcj64,1494
60
+ gllm_docproc/loader/html/nested/html_nested_base_handler.pyi,sha256=YBlhfDq_Mq0eYiA-8XDIH1EnlqxnSm6Tnzb6Af3dMwY,4734
61
+ gllm_docproc/loader/html/nested/html_nested_element_handler.pyi,sha256=wHJJDl28NgfbsKCD8UgsucWth4mXVmMMdp0c2RaeEtc,916
62
+ gllm_docproc/loader/html/nested/html_nested_loader.pyi,sha256=YXXo34KJ9WMzYdzSng0a6fq9THf909-CkoTJ81pdLKs,917
63
+ gllm_docproc/loader/html/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ gllm_docproc/loader/html/utils/flat_table_utils.pyi,sha256=ho48kICU2FzLnzRi96KRH4gMMDflH5kxLJyZnFsAl88,1202
65
+ gllm_docproc/loader/html/utils/html_utils.pyi,sha256=n1fXCjo1dHMiSuJsPHzQHgaYAxQCujQ96IX8qBV_7K0,1644
66
+ gllm_docproc/loader/html/utils/removed_components.pyi,sha256=UGUFq-rUlufUbs7uAp_FbDNzNVe_f4YVviwx_TLVSOg,2090
67
+ gllm_docproc/loader/html/utils/string_utils.pyi,sha256=OrXkM4n53mS5GIrlZJNCfQQLP8CMULt4yVyogNW38r4,1092
68
+ gllm_docproc/loader/html/utils/table_utils.pyi,sha256=UtW25yjgw5ui3L-WxOjMYQCnb8my8cDfm9qZ8BFaRIo,2761
69
+ gllm_docproc/loader/json/__init__.pyi,sha256=FwvHdSzGzNaPGTw0ge_-YVqBoPj5YQtLJLs3JxdNvEE,109
70
+ gllm_docproc/loader/json/json_elements_loader.pyi,sha256=A38-goFeB0biNPjZBaKu42fB4gCYCH6bVOZEq3hUlE4,1455
71
+ gllm_docproc/loader/loader_utils.pyi,sha256=4U905dncFjOHGMHNG5OtPwnmLXMoRCzKxOIX-wIe1Ro,1685
72
+ gllm_docproc/loader/pdf/__init__.pyi,sha256=N6-PmEK7tRPr4GRrPLBHwGjzYx9nEuBGLM0i95hygzM,1199
73
+ gllm_docproc/loader/pdf/adobe_pdf_extract_loader.pyi,sha256=zukKsW4lFvsiYVWlW-8d0STKj4H2yrv-eVi0YzkWSNM,1959
74
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_loader.pyi,sha256=JX0XKsFqEDZkNUv6NoBdlA0FH1ys7DII5NO4nqW28nw,2463
75
+ gllm_docproc/loader/pdf/azure_ai_document_intelligence_raw_loader.pyi,sha256=mWHEWZI57punX76q2cYnDXhKOUZs3r-nzSKjk7Ax7z8,2708
76
+ gllm_docproc/loader/pdf/glair_vision_ocr_loader.pyi,sha256=ghQ71Fvjgh707YONVFfnV4hlQAECUfYO2LGmXFvWdfQ,1660
77
+ gllm_docproc/loader/pdf/pdf_loader_utils.pyi,sha256=u7FGjtY0nS7AEGFG2rP_xB8d9vf-7j023MoQ_s3AhfY,2938
78
+ gllm_docproc/loader/pdf/pdf_miner_loader.pyi,sha256=J3ktjrct3_OzXoD8EU5R8N7UgmHBWJwUUxKExTUJxbo,2216
79
+ gllm_docproc/loader/pdf/pdf_miner_word_loader.pyi,sha256=l1JY1M8JvAVlAK73VDhJCLPWp1lAf9QmnUmSPCp6udI,1518
80
+ gllm_docproc/loader/pdf/pdf_plumber_loader.pyi,sha256=5JiNsLngH-98XavxilOY9sPaS7FLSUwuG8douHqiLs4,2148
81
+ gllm_docproc/loader/pdf/pymupdf_loader.pyi,sha256=I0ww80bpl-yljxwCqDzJzVTGl7kerySPfnsIG3qevqU,2743
82
+ gllm_docproc/loader/pdf/pymupdf_span_loader.pyi,sha256=Bl8olylBnzEVkUSaTaE_hKK6V7jWoNGRtXyTNhM0T1A,2762
83
+ gllm_docproc/loader/pdf/pymupdf_utils.pyi,sha256=41aGe1_v1zFyx3__RzcbkY5jZrwuz0b8Z2aXth6PFXI,1678
84
+ gllm_docproc/loader/pdf/tabula_loader.pyi,sha256=8y6gHpttx44u4ryovzsW4O7QfvBAMxHbnmaHOBz23I8,1756
85
+ gllm_docproc/loader/pdf/text_inject_pdf_plumber_loader.pyi,sha256=tjuyRXvE50V4sBpTCGLDPJv0Vo0_noZqoyhoP8CUUdM,2008
86
+ gllm_docproc/loader/pipeline_loader.pyi,sha256=_ba5MkcCuhSFjroJgLnsTndJbO3G4T-LZET1O5TxkQw,1804
87
+ gllm_docproc/loader/txt/__init__.pyi,sha256=0Lz4_zBg0emZiL62MCZRBHTB_ebiGWDE7QlXf3SwwrU,72
88
+ gllm_docproc/loader/txt/txt_loader.pyi,sha256=xV3fCyk98ude7UcBMbP7EK8Ub90ARUu1kXA_kWX6ZRc,1172
89
+ gllm_docproc/loader/xlsx/__init__.pyi,sha256=HbXl6OsK-Ws7NGrz5VOVbBEm-6TqzEF960ekCanbh_k,92
90
+ gllm_docproc/loader/xlsx/openpyxl_loader.pyi,sha256=b3XJHNSKX5weBQfJV6huHLt1vIE67Nb3pTIvQKVBlLM,1982
91
+ gllm_docproc/model/__init__.pyi,sha256=h6qDhCCUyzSUq-X2Tq7Okn7_T8MCN7Nd1bpKPItDkkc,147
92
+ gllm_docproc/model/element.pyi,sha256=b66oFlpUNaqUgwDhWFLy-3NeHdVnIoL0lSCmtj8hUfQ,1048
93
+ gllm_docproc/model/element_metadata.pyi,sha256=YqbUryhgAkqMTdncZEIq3VQq_uGmTgTI__A__O3iD4I,811
94
+ gllm_docproc/parser/__init__.pyi,sha256=jY_LxmoS3ye6-3pZM77x0ml-s3d3cARGBATPHDxoXqU,156
95
+ gllm_docproc/parser/base_parser.pyi,sha256=Ti78N9maq3aSViSyBzkWVFirePadufsvz4KSE-a50EM,1169
96
+ gllm_docproc/parser/document/__init__.pyi,sha256=WUrY1q7Xs52BnUAsfrUWyOCTABEDhXUJBR8yZB0XBTk,260
97
+ gllm_docproc/parser/document/docx_parser.pyi,sha256=YAzEoZWVcL0_MGHj6Tlx0OFII3F4XWD3xomMvvMO2kI,1519
98
+ gllm_docproc/parser/document/pdf_parser.pyi,sha256=4OwnSCrl6-pQehpEjQ-vm8Ho66I8JR1BlvidVvzxYig,1511
99
+ gllm_docproc/parser/document/txt_parser.pyi,sha256=vfLI61LCBTRALu2fG-TEnYEksuFxaiz3A0F3kqbhPN8,863
100
+ gllm_docproc/parser/document/xlsx_parser.pyi,sha256=DJtuJxYJDsWp1DE3k9kSYqObVyYoAb7KOjWow_yqvcE,1068
101
+ gllm_docproc/parser/html/__init__.pyi,sha256=Tq5phnPf8IxLEjAvAS0O1lc6hoCFK8yYtLiHc1q-plM,194
102
+ gllm_docproc/parser/html/flat/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
+ gllm_docproc/parser/html/flat/html_flat_parser.pyi,sha256=EMDr7Xheorm6WTF4iuoOUac2M849zP-D22hqsxUoh0Q,1231
104
+ gllm_docproc/parser/html/nested/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
+ gllm_docproc/parser/html/nested/html_json_processor.pyi,sha256=-KxXGuRzeAKIphSHc7juKvIzx_s8Y573YWH4f7aogv4,6340
106
+ gllm_docproc/parser/html/nested/html_nested_parser.pyi,sha256=ZK2dkti7g664LdnlQcIAmtLVCE1rZX66A1VA124PH7A,990
107
+ gllm_docproc/parser/html/nested/nested_element.pyi,sha256=Ym4OEkrT58XFpaMA_gWUeopbxrDPoWanFB0FE5UZLHc,1246
108
+ gllm_docproc/parser/pipeline_parser.pyi,sha256=emQW79pLtb3kU1FUig2SWGaA-e8Syw8IXFH1yOUmcPI,1237
109
+ gllm_docproc/parser/table/__init__.pyi,sha256=MPFJ33qPHic7DgS8QwOI2-0gw9SwhsFjmHDkvGsYHGc,109
110
+ gllm_docproc/parser/table/table_caption_parser.pyi,sha256=VU1y8vg4JfXb-NWvnZP4QiFxDD9mcss_xVFxVyMm10E,2704
111
+ gllm_docproc/request_handler/__init__.pyi,sha256=hJJrWfbAdRUH8jZfjW4Q2PqNhjo6rvK8BTDVCbBoJ28,109
112
+ gllm_docproc/request_handler/base_request_handler.pyi,sha256=UfCLOvIChcq9KxsnZeRvniZkBIXbvPiEPR0dK730U3c,479
113
+ gllm_docproc/response_handler/__init__.pyi,sha256=Ch5ht8cCWZqvePwp7Az9Uq-18ZPpghBwB7P8UnmUINw,113
114
+ gllm_docproc/response_handler/base_response_handler.pyi,sha256=rxLmPH2GPY6cf-J4bzOo3VBPQkMeZgyn5II51wg3zF8,1283
115
+ gllm_docproc/utils/__init__.pyi,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
+ gllm_docproc/utils/file_utils.pyi,sha256=ifTQMeaF73t4A9fpwDCsX9_jiO8Wi5ZaUMYFs7gUDiA,2565
117
+ gllm_docproc/utils/html_constants.pyi,sha256=QFUGPtiw705g7Ftl_IY5tAnAtKn-YMEym-kW0pTpBhU,2799
118
+ gllm_docproc.build/.gitignore,sha256=aEiIwOuxfzdCmLZe4oB1JsBmCUxwG8x-u-HBCV9JT8E,1
119
+ gllm_docproc.cpython-311-darwin.so,sha256=rAqCkqxabY6CUFqmua0jPVUnT4wMvheTvuSJ_bnS_h0,4464864
120
+ gllm_docproc.pyi,sha256=Hxh31cepIKdKf1iT5OmU2FAkTDKA3E_NniZ3lDfNQaE,4955
121
+ gllm_docproc_binary-0.1.8.dist-info/METADATA,sha256=NdGZBZM1pBiIpw2FqAMRz5NHfTfmlOe1bSegEggsPZU,4725
122
+ gllm_docproc_binary-0.1.8.dist-info/WHEEL,sha256=r3EiIdyNg8wC0u2K9wWWq7Elb6S4XGGmkyBqljSOtNU,107
123
+ gllm_docproc_binary-0.1.8.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 1.9.0
3
+ Root-Is-Purelib: false
4
+ Tag: cp311-cp311-macosx_13_0_x86_64