pembot 0.1.2__py2.py3-none-any.whl → 0.1.4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- pembot/.git/COMMIT_EDITMSG +1 -1
- pembot/.git/index +0 -0
- pembot/.git/logs/HEAD +3 -0
- pembot/.git/logs/refs/heads/main +3 -0
- pembot/.git/logs/refs/remotes/origin/main +3 -0
- pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b +1 -0
- pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4 +1 -0
- pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2 +0 -0
- pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
- pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
- pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
- pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
- pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835 +0 -0
- pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
- pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
- pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
- pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663 +0 -0
- pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
- pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349 +0 -0
- pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7 +0 -0
- pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
- pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
- pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
- pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
- pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67 +1 -0
- pembot/.git/refs/heads/main +1 -1
- pembot/.git/refs/remotes/origin/main +1 -1
- pembot/.gitignore +0 -1
- pembot/AnyToText/convertor.py +62 -225
- pembot/__init__.py +1 -1
- pembot/config/config.yaml +1 -1
- pembot/requirements.txt +5 -1
- {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/METADATA +1 -1
- {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/RECORD +36 -16
- {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/WHEEL +0 -0
- {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/licenses/LICENSE +0 -0
pembot/.git/COMMIT_EDITMSG
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
cyto/put the output of the converted file in excel case to write to file if path is given
|
pembot/.git/index
CHANGED
|
Binary file
|
pembot/.git/logs/HEAD
CHANGED
|
@@ -13,3 +13,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
13
13
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
14
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
15
15
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
|
|
16
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
|
|
17
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
|
|
18
|
+
c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880975 +0530 commit: cyto/put the output of the converted file in excel case to write to file if path is given
|
pembot/.git/logs/refs/heads/main
CHANGED
|
@@ -13,3 +13,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
13
13
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
14
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
15
15
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
|
|
16
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
|
|
17
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
|
|
18
|
+
c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880975 +0530 commit: cyto/put the output of the converted file in excel case to write to file if path is given
|
|
@@ -12,3 +12,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
12
12
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
|
|
13
13
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
|
|
14
14
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859659 +0530 update by push
|
|
15
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865472 +0530 update by push
|
|
16
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877832 +0530 update by push
|
|
17
|
+
c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880990 +0530 update by push
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
xe��� E����.Mc�����M�Cͣb��@���.���{�v��ж;)�8C���q�p$���b��ҁv<nA���nN������c�e�WpT8�0���%���*Y*�Rp�jߴͱ&��ƣ
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
x+)JMU0�d040031Q�����,���+�dx6���M�9{wk�+��q�IO�D������Ԣ��"�:��5:f�x1a�]�`ܱ;�3L�M#�
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
x��Kn�@D���GJ��#��\�c��Ш1���a&7Ȯ>*Փ�,��!�XUT
|
pembot/.git/refs/heads/main
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
e6adbc3c373070269f97ef82d4f63027d7878f67
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
e6adbc3c373070269f97ef82d4f63027d7878f67
|
pembot/.gitignore
CHANGED
pembot/AnyToText/convertor.py
CHANGED
|
@@ -3,11 +3,11 @@ import mimetypes
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from pembot.pdf2markdown.extract import MarkdownPDFExtractor
|
|
5
5
|
import os
|
|
6
|
-
import json
|
|
7
6
|
import pandas as pd
|
|
8
|
-
from typing import Literal, Union
|
|
7
|
+
from typing import Literal, Union
|
|
9
8
|
import tempfile
|
|
10
9
|
from datetime import datetime, date
|
|
10
|
+
from tabulate import tabulate
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
PandasReadEngineType = Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb', 'calamine', None]
|
|
@@ -53,10 +53,11 @@ class Convertor():
|
|
|
53
53
|
self.output= output_file.read()
|
|
54
54
|
elif file_type == 'excel':
|
|
55
55
|
self.input_filepath= myfile
|
|
56
|
-
self.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
56
|
+
self.output= self.convert_excel_to_markdown()
|
|
57
|
+
if myfile and output_dir:
|
|
58
|
+
with open(output_dir / (myfile.stem + '.md'), "w") as output_file:
|
|
59
|
+
output_file.write(self.output)
|
|
60
|
+
|
|
60
61
|
|
|
61
62
|
elif output_dir is not None and myfile is not None:
|
|
62
63
|
print("got output path for conversion: ", output_dir)
|
|
@@ -64,8 +65,6 @@ class Convertor():
|
|
|
64
65
|
|
|
65
66
|
self.output_dir= output_dir
|
|
66
67
|
self.input_filepath= myfile
|
|
67
|
-
base_name, _ = os.path.splitext(myfile.name)
|
|
68
|
-
self.json_filepath = output_dir / 'json' / (base_name + ".json")
|
|
69
68
|
|
|
70
69
|
if mt == 'application/json':
|
|
71
70
|
print("the file was json")
|
|
@@ -73,242 +72,80 @@ class Convertor():
|
|
|
73
72
|
print("the file was pdf, outputting in: ", output_dir)
|
|
74
73
|
extractor= MarkdownPDFExtractor(str(myfile), output_path= str(self.output_dir), page_delimiter= "-- NEXT PAGE --", model_name= model_name)
|
|
75
74
|
extractor.extract()
|
|
75
|
+
with open(self.output_dir / (myfile.stem + '.md')) as output_file:
|
|
76
|
+
self.output= output_file.read()
|
|
76
77
|
|
|
77
78
|
elif mt in EXCEL_FILE_TYPES:
|
|
78
|
-
self.
|
|
79
|
+
self.output = self.convert_excel_to_markdown()
|
|
79
80
|
|
|
80
81
|
else:
|
|
81
82
|
print(mt)
|
|
82
83
|
|
|
83
|
-
def
|
|
84
|
-
self,
|
|
85
|
-
sheet_to_convert: Union[str, int, None] = None, # Relevant for Excel/ODS
|
|
86
|
-
orient: Literal['dict', 'list', 'series', 'split', 'records', 'index'] = 'records', # Corrected type hint
|
|
87
|
-
date_format: Union[str, None] = 'iso', # 'iso', 'epoch', or None
|
|
88
|
-
csv_encoding: str = 'utf-8', # For reading CSV files
|
|
89
|
-
excel_ods_engine: PandasReadEngineType = None # For Excel/ODS, e.g., 'openpyxl', 'xlrd', 'odf'
|
|
90
|
-
) -> bool:
|
|
84
|
+
def convert_excel_to_markdown(self, excel_ods_engine: PandasReadEngineType = None) -> str:
|
|
91
85
|
"""
|
|
92
|
-
Converts an Excel
|
|
93
|
-
|
|
86
|
+
Converts all sheets from an Excel or ODS file into a single Markdown string.
|
|
87
|
+
Each sheet is converted to a Markdown table, prefixed with the sheet's name.
|
|
94
88
|
|
|
95
89
|
Args:
|
|
96
|
-
sheet_to_convert (str | int | None, optional):
|
|
97
|
-
- For Excel/ODS:
|
|
98
|
-
- If None (default): Converts all sheets. The JSON output will be a
|
|
99
|
-
dictionary where keys are sheet names and values are the JSON
|
|
100
|
-
representation of each sheet.
|
|
101
|
-
- If str: Name of the specific sheet to convert.
|
|
102
|
-
- If int: Index of the specific sheet to convert (0-based).
|
|
103
|
-
If a specific sheet is requested, the JSON output will directly be
|
|
104
|
-
the representation of that sheet.
|
|
105
|
-
- For CSV: This parameter is ignored. The entire CSV is processed.
|
|
106
|
-
orient (str, optional): Pandas DataFrame.to_dict() orientation for each sheet/CSV.
|
|
107
|
-
Default: 'records'. See pandas.DataFrame.to_dict() documentation.
|
|
108
|
-
date_format (str | None, optional): Format for datetime objects.
|
|
109
|
-
- 'iso' (default): ISO8601 format (e.g., '2023-10-27T10:30:00').
|
|
110
|
-
- 'epoch': Milliseconds since epoch.
|
|
111
|
-
- None: Pandas default (often Timestamps). 'iso' is generally safer for JSON.
|
|
112
|
-
csv_encoding (str, optional): Encoding for reading CSV files. Default is 'utf-8'.
|
|
113
90
|
excel_ods_engine (str | None, optional): Pandas engine for reading Excel or ODS files.
|
|
114
91
|
- For Excel: 'openpyxl' (for .xlsx), 'xlrd' (for .xls).
|
|
115
92
|
- For ODS: 'odf' (requires 'odfpy' library).
|
|
116
93
|
If None, pandas auto-detects based on file extension and installed libraries.
|
|
117
94
|
|
|
118
95
|
Returns:
|
|
119
|
-
|
|
96
|
+
str: A string containing the Markdown tables for all sheets, or an error message.
|
|
120
97
|
"""
|
|
121
|
-
|
|
122
98
|
input_filepath = self.input_filepath
|
|
123
|
-
|
|
99
|
+
markdown_output = []
|
|
124
100
|
|
|
101
|
+
file_suffix= ''
|
|
125
102
|
try:
|
|
126
|
-
|
|
127
103
|
if not input_filepath.exists():
|
|
128
|
-
|
|
129
|
-
return False
|
|
130
|
-
|
|
131
|
-
# Ensure output directory exists
|
|
132
|
-
json_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
return f"Error: Input file not found at {input_filepath}"
|
|
133
105
|
|
|
134
106
|
file_suffix = input_filepath.suffix.lower()
|
|
135
|
-
output_data_final: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
|
|
136
|
-
|
|
137
|
-
dataframes_to_process: list[tuple[pd.DataFrame, str | None]] = []
|
|
138
|
-
|
|
139
107
|
current_engine: PandasReadEngineType = excel_ods_engine
|
|
140
108
|
|
|
141
|
-
if file_suffix
|
|
142
|
-
if
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
print(f"Error reading CSV file '{input_filepath.name}': {e}")
|
|
149
|
-
return False
|
|
150
|
-
|
|
151
|
-
elif file_suffix in ['.xls', '.xlsx', '.ods']:
|
|
152
|
-
try:
|
|
153
|
-
if file_suffix == '.ods':
|
|
154
|
-
if current_engine is None:
|
|
155
|
-
current_engine = 'odf'
|
|
156
|
-
elif current_engine != 'odf':
|
|
157
|
-
print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
|
|
158
|
-
current_engine = 'odf'
|
|
159
|
-
|
|
160
|
-
if sheet_to_convert is not None:
|
|
161
|
-
df = pd.read_excel(input_filepath, sheet_name=sheet_to_convert, engine=current_engine)
|
|
162
|
-
dataframes_to_process.append((df, None))
|
|
163
|
-
|
|
164
|
-
else:
|
|
165
|
-
excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
|
|
166
|
-
if not excel_file.sheet_names:
|
|
167
|
-
print(f"Warning: File '{input_filepath.name}' contains no sheets.")
|
|
168
|
-
for sheet_name in excel_file.sheet_names:
|
|
169
|
-
df = excel_file.parse(sheet_name) # engine is inherited
|
|
170
|
-
dataframes_to_process.append((df, sheet_name))
|
|
171
|
-
except ImportError as ie:
|
|
172
|
-
if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
|
|
173
|
-
print(f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'.")
|
|
174
|
-
elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
|
|
175
|
-
print(f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'.")
|
|
176
|
-
elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
|
|
177
|
-
print(f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'.")
|
|
178
|
-
else:
|
|
179
|
-
print(f"ImportError reading file '{input_filepath.name}': {ie}")
|
|
180
|
-
return False
|
|
181
|
-
except Exception as e:
|
|
182
|
-
print(f"Error reading Excel/ODS file '{input_filepath.name}': {e}")
|
|
183
|
-
return False
|
|
184
|
-
else:
|
|
185
|
-
print(f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file.")
|
|
186
|
-
return False
|
|
187
|
-
|
|
188
|
-
if not dataframes_to_process and file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None:
|
|
189
|
-
print(f"Info: No dataframes were loaded from '{input_filepath.name}'. Output JSON will be empty if processing all sheets from an empty file.")
|
|
190
|
-
elif not dataframes_to_process and not (file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None):
|
|
191
|
-
pass
|
|
192
|
-
|
|
193
|
-
is_direct_output = len(dataframes_to_process) == 1 and dataframes_to_process[0][1] is None
|
|
194
|
-
temp_processed_data: Dict[str, Any] = {}
|
|
195
|
-
|
|
196
|
-
for df_original, name_key in dataframes_to_process:
|
|
197
|
-
df = df_original.copy()
|
|
198
|
-
|
|
199
|
-
# Handle datetime columns with improved detection and conversion
|
|
200
|
-
if date_format:
|
|
201
|
-
# Check for datetime columns using multiple approaches
|
|
202
|
-
datetime_columns = []
|
|
203
|
-
|
|
204
|
-
# Method 1: Use pandas dtype detection
|
|
205
|
-
datetime_columns.extend(df.select_dtypes(include=['datetime64[ns]', 'datetime', 'datetimetz']).columns.tolist())
|
|
206
|
-
|
|
207
|
-
# Method 2: Check for datetime objects in each column
|
|
208
|
-
for col in df.columns:
|
|
209
|
-
if col not in datetime_columns:
|
|
210
|
-
# Sample a few non-null values to check type
|
|
211
|
-
sample_values = df[col].dropna().head(10)
|
|
212
|
-
if len(sample_values) > 0:
|
|
213
|
-
for val in sample_values:
|
|
214
|
-
if isinstance(val, (datetime, date, pd.Timestamp)):
|
|
215
|
-
datetime_columns.append(col)
|
|
216
|
-
break
|
|
217
|
-
|
|
218
|
-
# Convert datetime columns
|
|
219
|
-
for col_name in datetime_columns:
|
|
220
|
-
try:
|
|
221
|
-
if date_format == 'iso':
|
|
222
|
-
df[col_name] = df[col_name].apply(lambda x: self._convert_to_iso(x))
|
|
223
|
-
elif date_format == 'epoch':
|
|
224
|
-
df[col_name] = df[col_name].apply(lambda x: self._convert_to_epoch(x))
|
|
225
|
-
except Exception as e_date:
|
|
226
|
-
print(f"Warning: Could not fully convert date column '{col_name}' in '{name_key or input_filepath.name}' using format '{date_format}'. Error: {e_date}")
|
|
227
|
-
|
|
228
|
-
# Replace NaN values with None for JSON compatibility
|
|
229
|
-
df = df.astype(object).where(pd.notnull(df), None)
|
|
230
|
-
|
|
231
|
-
# Final safety check: convert any remaining datetime objects
|
|
232
|
-
for col in df.columns:
|
|
233
|
-
df[col] = df[col].apply(lambda x: self._safe_datetime_convert(x, date_format))
|
|
234
|
-
|
|
235
|
-
current_json_segment = df.to_dict(orient=orient)
|
|
236
|
-
|
|
237
|
-
if is_direct_output:
|
|
238
|
-
output_data_final = current_json_segment
|
|
239
|
-
break
|
|
240
|
-
else:
|
|
241
|
-
if name_key is not None:
|
|
242
|
-
temp_processed_data[name_key] = current_json_segment
|
|
243
|
-
|
|
244
|
-
if not is_direct_output:
|
|
245
|
-
output_data_final = temp_processed_data
|
|
246
|
-
|
|
247
|
-
with open(json_filepath, 'w', encoding='utf-8') as f:
|
|
248
|
-
json.dump(output_data_final, f, indent=4, ensure_ascii=False)
|
|
249
|
-
|
|
250
|
-
print(f"Successfully converted '{input_filepath.name}' to '{json_filepath.name}'")
|
|
251
|
-
return True
|
|
252
|
-
|
|
253
|
-
except FileNotFoundError:
|
|
254
|
-
print(f"Error: Input file not found at {input_filepath.name}")
|
|
255
|
-
return False
|
|
256
|
-
except ValueError as ve:
|
|
257
|
-
print(f"ValueError during conversion of '{input_filepath.name}': {ve}")
|
|
258
|
-
return False
|
|
259
|
-
except Exception as e:
|
|
260
|
-
print(f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}")
|
|
261
|
-
return False
|
|
109
|
+
if file_suffix in ['.xls', '.xlsx', '.ods']:
|
|
110
|
+
if file_suffix == '.ods':
|
|
111
|
+
if current_engine is None:
|
|
112
|
+
current_engine = 'odf'
|
|
113
|
+
elif current_engine != 'odf':
|
|
114
|
+
print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
|
|
115
|
+
current_engine = 'odf'
|
|
262
116
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
return None
|
|
117
|
+
excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
|
|
118
|
+
if not excel_file.sheet_names:
|
|
119
|
+
return f"Warning: File '{input_filepath.name}' contains no sheets."
|
|
267
120
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
return value.isoformat()
|
|
275
|
-
else:
|
|
276
|
-
return str(value)
|
|
277
|
-
except:
|
|
278
|
-
return str(value) if value is not None else None
|
|
121
|
+
for sheet_name in excel_file.sheet_names:
|
|
122
|
+
df = excel_file.parse(sheet_name)
|
|
123
|
+
markdown_output.append(f"## {sheet_name}\n")
|
|
124
|
+
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
125
|
+
markdown_output.append(markdown_table)
|
|
126
|
+
markdown_output.append("\n")
|
|
279
127
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
128
|
+
return "\n".join(markdown_output)
|
|
129
|
+
|
|
130
|
+
elif file_suffix == '.csv':
|
|
131
|
+
df = pd.read_csv(input_filepath)
|
|
132
|
+
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
133
|
+
return markdown_table
|
|
284
134
|
|
|
285
|
-
try:
|
|
286
|
-
if isinstance(value, (int, float)):
|
|
287
|
-
return int(value) # Assume already epoch
|
|
288
|
-
elif hasattr(value, 'timestamp'):
|
|
289
|
-
return int(value.timestamp() * 1000)
|
|
290
|
-
elif isinstance(value, pd.Timestamp):
|
|
291
|
-
return int(value.timestamp() * 1000)
|
|
292
135
|
else:
|
|
293
|
-
return
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
# If it's a datetime-like object, convert it
|
|
303
|
-
if isinstance(value, (datetime, date, pd.Timestamp)):
|
|
304
|
-
if date_format == 'iso':
|
|
305
|
-
return self._convert_to_iso(value)
|
|
306
|
-
elif date_format == 'epoch':
|
|
307
|
-
return self._convert_to_epoch(value)
|
|
136
|
+
return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
|
|
137
|
+
|
|
138
|
+
except ImportError as ie:
|
|
139
|
+
if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
|
|
140
|
+
return f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'."
|
|
141
|
+
elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
|
|
142
|
+
return f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'."
|
|
143
|
+
elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
|
|
144
|
+
return f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'."
|
|
308
145
|
else:
|
|
309
|
-
return
|
|
310
|
-
|
|
311
|
-
|
|
146
|
+
return f"ImportError reading file '{input_filepath.name}': {ie}"
|
|
147
|
+
except Exception as e:
|
|
148
|
+
return f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}"
|
|
312
149
|
|
|
313
150
|
|
|
314
151
|
def chunk_text(text, chunk_size=500, overlap_size=50):
|
|
@@ -337,29 +174,29 @@ def chunk_text(text, chunk_size=500, overlap_size=50):
|
|
|
337
174
|
if __name__ == '__main__':
|
|
338
175
|
print("Test Run Start:")
|
|
339
176
|
try:
|
|
340
|
-
print("Test 1: scaned pdf page, bytes")
|
|
341
|
-
with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
|
|
342
|
-
|
|
343
|
-
|
|
177
|
+
# print("Test 1: scaned pdf page, bytes")
|
|
178
|
+
# with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
|
|
179
|
+
# conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
|
|
180
|
+
# print(conv.output)
|
|
344
181
|
|
|
345
182
|
# print("Test 2: JD pdf, bytes")
|
|
346
183
|
# with open("/home/cyto/dev/pembotdir/jds/PM Trainee.pdf", "rb") as imgpdf:
|
|
347
184
|
# conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
|
|
348
185
|
# print(conv.output)
|
|
349
186
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
187
|
+
print("Test 3: excel schedule, bytes")
|
|
188
|
+
with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
|
|
189
|
+
conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
|
|
190
|
+
print(conv.output)
|
|
354
191
|
|
|
355
192
|
# without bytes example:
|
|
356
193
|
print("Test 4: scanned pdf, path")
|
|
357
194
|
conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
|
|
358
195
|
print(conv.output)
|
|
359
196
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
197
|
+
print("Test 5: schedule excel, path")
|
|
198
|
+
conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
|
|
199
|
+
print(conv.output)
|
|
363
200
|
except FileNotFoundError as fe:
|
|
364
201
|
print("file not found, modify the driver code to get sample files to test:\n\n", fe)
|
|
365
202
|
except Exception as e:
|
pembot/__init__.py
CHANGED
pembot/config/config.yaml
CHANGED
pembot/requirements.txt
CHANGED
|
@@ -9,6 +9,7 @@ cffi==1.17.1
|
|
|
9
9
|
charset-normalizer==3.4.2
|
|
10
10
|
click==8.2.1
|
|
11
11
|
cryptography==45.0.5
|
|
12
|
+
defusedxml==0.7.1
|
|
12
13
|
dnspython==2.7.0
|
|
13
14
|
duckduckgo_search==8.1.1
|
|
14
15
|
et_xmlfile==2.0.0
|
|
@@ -36,6 +37,7 @@ MarkupSafe==3.0.2
|
|
|
36
37
|
mdurl==0.1.2
|
|
37
38
|
msgpack==1.1.1
|
|
38
39
|
numpy==2.3.1
|
|
40
|
+
odfpy==1.4.1
|
|
39
41
|
ollama==0.5.1
|
|
40
42
|
openpyxl==3.1.5
|
|
41
43
|
orjson==3.10.18
|
|
@@ -44,7 +46,7 @@ pandas==2.3.0
|
|
|
44
46
|
pathlib==1.0.1
|
|
45
47
|
pdfminer.six==20250506
|
|
46
48
|
pdfplumber==0.11.7
|
|
47
|
-
pembot==0.1.
|
|
49
|
+
pembot==0.1.4
|
|
48
50
|
pillow==11.3.0
|
|
49
51
|
primp==0.15.0
|
|
50
52
|
pyasn1==0.6.1
|
|
@@ -76,6 +78,7 @@ smolagents==1.20.0
|
|
|
76
78
|
sniffio==1.3.1
|
|
77
79
|
soupsieve==2.7
|
|
78
80
|
starlette==0.46.2
|
|
81
|
+
tabulate==0.9.0
|
|
79
82
|
tenacity==8.5.0
|
|
80
83
|
tomlkit==0.13.3
|
|
81
84
|
tqdm==4.67.1
|
|
@@ -86,3 +89,4 @@ tzdata==2025.2
|
|
|
86
89
|
urllib3==2.5.0
|
|
87
90
|
uvicorn==0.35.0
|
|
88
91
|
websockets==15.0.1
|
|
92
|
+
xlrd==2.0.2
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
pembot/.gitignore,sha256=
|
|
1
|
+
pembot/.gitignore,sha256=yyDEUmeqZekG4AOrU9Zvu2ZQhJvEzEg_lQp2CDfBhXM,92
|
|
2
2
|
pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
pembot/__init__.py,sha256=
|
|
3
|
+
pembot/__init__.py,sha256=ALfgnAweIDuCsninSev7KaxWZHPDj3uipgBHvJALnvI,211
|
|
4
4
|
pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
|
|
5
5
|
pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
|
|
6
6
|
pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
|
|
7
7
|
pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
|
|
8
8
|
pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
|
|
9
9
|
pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
|
|
10
|
-
pembot/requirements.txt,sha256=
|
|
10
|
+
pembot/requirements.txt,sha256=oNGOW-nqvzJZvy5qmFk__S5buH3jYS7-13VaLQxBhpI,1567
|
|
11
11
|
pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
|
|
12
|
-
pembot/.git/COMMIT_EDITMSG,sha256=
|
|
12
|
+
pembot/.git/COMMIT_EDITMSG,sha256=TpJZGgNNb5nhYiJQlrJUzYbeFbY6aCXZ8n9V0zQoe0E,90
|
|
13
13
|
pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
|
14
14
|
pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
|
|
15
15
|
pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
|
|
16
|
-
pembot/.git/index,sha256=
|
|
16
|
+
pembot/.git/index,sha256=eLDRlMvYMR_BU749-CvqXohIriJGcW291fSOM6SHONw,2054
|
|
17
17
|
pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
|
|
18
18
|
pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
|
|
19
19
|
pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
|
|
@@ -30,11 +30,13 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
|
|
|
30
30
|
pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
|
|
31
31
|
pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
|
32
32
|
pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
|
33
|
-
pembot/.git/logs/HEAD,sha256
|
|
34
|
-
pembot/.git/logs/refs/heads/main,sha256
|
|
33
|
+
pembot/.git/logs/HEAD,sha256=-2ghm_6RqCop8Zq5lfUj27TWryvf5Eh6jXyROZL29RI,4360
|
|
34
|
+
pembot/.git/logs/refs/heads/main,sha256=-2ghm_6RqCop8Zq5lfUj27TWryvf5Eh6jXyROZL29RI,4360
|
|
35
35
|
pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
|
|
36
|
-
pembot/.git/logs/refs/remotes/origin/main,sha256=
|
|
36
|
+
pembot/.git/logs/refs/remotes/origin/main,sha256=PN9Pc--KH5XuvjF8pDgEcJlQJIIumYsi8KRaIpQo_70,2482
|
|
37
|
+
pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b,sha256=pnk_IbjhUJWavx5BKSlXX8CEvWEMoSm8Dv1tQrUmzn4,169
|
|
37
38
|
pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
|
|
39
|
+
pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4,sha256=eNZTNvT7qgsLCfJvRfTETWZIkk_vBEEroLNbPC8RRa4,90
|
|
38
40
|
pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
|
|
39
41
|
pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
|
|
40
42
|
pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa,sha256=hsOHhX0Yajg27Y7B9lo-WjDXzW1KNMg2CBr93G116EY,387
|
|
@@ -42,15 +44,21 @@ pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1,sha256=GKt_CAJNOQX
|
|
|
42
44
|
pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c,sha256=Uk1dStvEBica-t38qHsZZ_4mxvi6b6VA9PaKE4KSunQ,90
|
|
43
45
|
pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05QL0Do-raB4AtK5QjvKLFBNc0RZqNga9o,244
|
|
44
46
|
pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
|
|
47
|
+
pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2,sha256=5sqQ9f08zuuqxJ-zpJzCmz2iPlgYe5D-BVKl_K_KKUQ,527
|
|
45
48
|
pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
|
|
46
49
|
pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
|
|
50
|
+
pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5,sha256=OhI6pEx_G6KbujS7idkp5MxJd1Aw92Wn3Sl-JBgU2VU,115
|
|
47
51
|
pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
|
|
48
52
|
pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
|
|
49
53
|
pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
|
|
54
|
+
pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101,sha256=Zpth_iVM6H5W4u5jLVEKdRz0i3ydBLm5XJql4ieuj8U,169
|
|
50
55
|
pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
|
|
56
|
+
pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a,sha256=tNzFPYJ0Y6YpaNw4w2hRH_0iTa5fNlC2nzARkKFbIec,162
|
|
57
|
+
pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2,sha256=SQ84I7DnyPaaxoWCBoh20Iw1VZm8wgSaPaL5uDR-R8I,90
|
|
51
58
|
pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
|
|
52
59
|
pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
|
|
53
60
|
pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
|
|
61
|
+
pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835,sha256=EZlP672_7dB3SZ_ZwyBsFmVTJpLoblFqDdfoW-2v990,2879
|
|
54
62
|
pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515,sha256=A9MNZO3QZ6ghGd1MyfmJ6H3dBTpF4HZcRosVxWytx8E,4077
|
|
55
63
|
pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f,sha256=omF4gmE9IQFZR8t6ybAKfnW02tdn9ZaVWKRhv_o1V4c,2083
|
|
56
64
|
pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f,sha256=ygVUpaLo7cxUdIgjFlaBh2BkllV6BIYYkzLIxsPKjWE,4111
|
|
@@ -62,11 +70,13 @@ pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D
|
|
|
62
70
|
pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
|
|
63
71
|
pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
|
|
64
72
|
pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
|
|
73
|
+
pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331,sha256=Kx2Tcs17_chpF5rbY3AB34Cj1S3DGnr7Y1tZOTxvrdM,80
|
|
65
74
|
pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4,sha256=jt9lsSz8c3dw9PyfEEtkReCC_8YLXSKuc6ykSJCKZPM,487
|
|
66
75
|
pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
|
|
67
76
|
pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
|
|
68
77
|
pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
|
|
69
78
|
pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
|
|
79
|
+
pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f,sha256=g84QcQu-1NZ4-MfLHRosIUOnlK0VItVBqqFW5ffGDNI,882
|
|
70
80
|
pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
|
|
71
81
|
pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
|
|
72
82
|
pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
|
|
@@ -75,16 +85,20 @@ pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLy
|
|
|
75
85
|
pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
|
|
76
86
|
pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
|
|
77
87
|
pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
|
|
88
|
+
pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e,sha256=0aByFDI4DyyfA-TKCFoUXbQAvNabJIV7CXHSIDna5bo,2833
|
|
89
|
+
pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663,sha256=xl8JaTrzL9T8JNKDi3c4xzknxk4XvNwDZIGrcgYfp_g,115
|
|
78
90
|
pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd,sha256=7AXaYVgItbw3xQiEqeRyO5qdIedIxDoI9hTDn8CBRxM,56
|
|
79
91
|
pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
|
|
80
92
|
pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
|
|
81
93
|
pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
|
|
82
94
|
pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
|
|
83
95
|
pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
|
|
96
|
+
pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138,sha256=7VMQzB6baLdC2Uj5f84w-X6XLM3GinXGBQjewhXupAc,914
|
|
84
97
|
pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
|
|
85
98
|
pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
|
|
86
99
|
pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
|
|
87
100
|
pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
|
|
101
|
+
pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349,sha256=mT1KGAHx7MalAkkpE7nAu6HlwXIB1Cts3MjZDLItErk,56
|
|
88
102
|
pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643,sha256=WPgmr5bXli5s8rNdiUQM4IB4o_xyJe6nuI3TG4e5aYs,487
|
|
89
103
|
pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
|
|
90
104
|
pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
|
|
@@ -99,22 +113,28 @@ pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269,sha256=rJJenBYvGWd
|
|
|
99
113
|
pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880,sha256=P_8LPBV0v4D17Akj4f5Cr2dhgNFUsh4o7DLK78CfNPo,349
|
|
100
114
|
pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c,sha256=QELzH3NdMCFohFEcf5oAAu_e54VFr-LhTyPbXY7GjSk,169
|
|
101
115
|
pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d,sha256=6cl8NMNQ9b5fBh97GPEQNssOVrh-EQLJfhqSBbNb_vU,205
|
|
116
|
+
pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7,sha256=zC9EjJo4qRxy8d1zuRafZ36QVP9KnxehomIfIC0ZZEo,915
|
|
102
117
|
pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb,sha256=zfd9KnP9YtBMwzci1BMWFHAQR4BWJ3XQsyr-rFqdw0Q,135
|
|
103
118
|
pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d,sha256=6EJskrHAkqVAC5ExxIZDQT_2kZWhfLPPAPbX61tmwgw,170
|
|
104
119
|
pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3,sha256=xCom1B6wyws8ZNTJoIL4JtVIXNv1yPCwsXfNsVCAGQA,4410
|
|
120
|
+
pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15,sha256=Kr92INW6aFVOO0iZm0J2y2Yld9N1Dg-fP6zP1_cqe0g,525
|
|
105
121
|
pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297,sha256=JOkICUEv6tdVp7mYDUKtXnsWq3IIZSmm8iUP7OqQwc4,56
|
|
106
122
|
pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e,sha256=MpiiCqAk6GQ5iGzeThU0rsabrgA5tCAgdIWudAM0IrA,420
|
|
107
123
|
pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFtMJgNKaPp6nTGDlMhPs6fkZTWevQWK_Lc,56
|
|
108
124
|
pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
|
|
109
125
|
pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
|
|
126
|
+
pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a,sha256=GPQso_R_RWWLx_pF3g58MiM4HyeSnpXTeLeKDfhkyPc,526
|
|
110
127
|
pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
|
|
128
|
+
pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab,sha256=HAXSsWokz2tuk9Y952ogIEzSBlbUC4lZ1CjvWBc22Cg,56
|
|
111
129
|
pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
|
|
130
|
+
pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495,sha256=7ZXWsXqapYhbZZJwaaeAwqGcgX8JwoS5DazqOGaRHeQ,179
|
|
112
131
|
pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
|
|
113
132
|
pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
|
|
114
133
|
pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
|
|
115
134
|
pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c,sha256=oAb2b2VwhPXykdK_ZV8MEFwfy-ZPd2Nja2gAv20U7hc,115
|
|
116
135
|
pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a,sha256=DNdNDoMdjDexgwLErwUZDQCpvq4-QkFHtbVRXW_jKTk,168
|
|
117
136
|
pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e,sha256=irJ-z8kPZmg85B0f4TQz73yJoCMWMWsIR3Pi5wx1Dlk,4034
|
|
137
|
+
pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67,sha256=e2NqH8wvYLSYgpHFoGTpurJ4gKU_PHSULZmjJETD3FQ,204
|
|
118
138
|
pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc,sha256=r4zY-__F4gSfjE7onRTrcxvv8umXKuPuFzd95AiQ0cs,392
|
|
119
139
|
pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d,sha256=qWZpM65kQPSxlVHAtyzH5L-j3rL-b9Jw-A7YBm4NMlI,249
|
|
120
140
|
pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58,sha256=lXbMvL_xl8PhWWfL5WAnvxqE3usiGO3iY83yi3GZwXc,4438
|
|
@@ -140,17 +160,17 @@ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj
|
|
|
140
160
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
|
|
141
161
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
|
|
142
162
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
|
|
143
|
-
pembot/.git/refs/heads/main,sha256=
|
|
163
|
+
pembot/.git/refs/heads/main,sha256=3SSyWY2LZTJaO5WhuYpKDpZAxDBK77HHPnOtGsRO4nw,41
|
|
144
164
|
pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
|
|
145
|
-
pembot/.git/refs/remotes/origin/main,sha256=
|
|
165
|
+
pembot/.git/refs/remotes/origin/main,sha256=3SSyWY2LZTJaO5WhuYpKDpZAxDBK77HHPnOtGsRO4nw,41
|
|
146
166
|
pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
pembot/AnyToText/convertor.py,sha256=
|
|
167
|
+
pembot/AnyToText/convertor.py,sha256=5oGrgWiznsmTHmq-oxdzHHriOpeXKH_jDzq19_3XCl4,9009
|
|
148
168
|
pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
169
|
pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
|
|
150
170
|
pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
|
|
151
171
|
pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
|
|
152
172
|
pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
|
|
153
|
-
pembot/config/config.yaml,sha256=
|
|
173
|
+
pembot/config/config.yaml,sha256=lry9zmzSb6bS0GEyH1pCTDfvNFq8g-AD-zz9eOkKJ4o,156
|
|
154
174
|
pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
|
|
155
175
|
pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
|
|
156
176
|
pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -206,7 +226,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
|
|
|
206
226
|
pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
207
227
|
pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
|
|
208
228
|
pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
|
|
209
|
-
pembot-0.1.
|
|
210
|
-
pembot-0.1.
|
|
211
|
-
pembot-0.1.
|
|
212
|
-
pembot-0.1.
|
|
229
|
+
pembot-0.1.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
230
|
+
pembot-0.1.4.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
|
|
231
|
+
pembot-0.1.4.dist-info/METADATA,sha256=ere6mCBeTMLBoRB5rQOQ88yHJkDOQUVv18DJI57CbLA,313
|
|
232
|
+
pembot-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|