pembot 0.1.2__py2.py3-none-any.whl → 0.1.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- pembot/.git/COMMIT_EDITMSG +1 -1
- pembot/.git/index +0 -0
- pembot/.git/logs/HEAD +2 -0
- pembot/.git/logs/refs/heads/main +2 -0
- pembot/.git/logs/refs/remotes/origin/main +2 -0
- pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
- pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
- pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
- pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
- pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
- pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
- pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
- pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
- pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
- pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
- pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
- pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
- pembot/.git/refs/heads/main +1 -1
- pembot/.git/refs/remotes/origin/main +1 -1
- pembot/.gitignore +0 -1
- pembot/AnyToText/convertor.py +58 -225
- pembot/__init__.py +1 -1
- pembot/config/config.yaml +1 -1
- pembot/requirements.txt +5 -1
- {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/METADATA +1 -1
- {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/RECORD +28 -16
- {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/WHEEL +0 -0
- {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/licenses/LICENSE +0 -0
pembot/.git/COMMIT_EDITMSG
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
cyto/fixed the excel to markdown conversion
|
pembot/.git/index
CHANGED
|
Binary file
|
pembot/.git/logs/HEAD
CHANGED
|
@@ -13,3 +13,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
13
13
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
14
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
15
15
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
|
|
16
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
|
|
17
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
|
pembot/.git/logs/refs/heads/main
CHANGED
|
@@ -13,3 +13,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
13
13
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
14
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
15
15
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
|
|
16
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
|
|
17
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
|
|
@@ -12,3 +12,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
|
|
|
12
12
|
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
|
|
13
13
|
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
|
|
14
14
|
f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859659 +0530 update by push
|
|
15
|
+
8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865472 +0530 update by push
|
|
16
|
+
203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877832 +0530 update by push
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
pembot/.git/refs/heads/main
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
c3cc0da3d955ecec0f865c46c030a0c073697495
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
c3cc0da3d955ecec0f865c46c030a0c073697495
|
pembot/.gitignore
CHANGED
pembot/AnyToText/convertor.py
CHANGED
|
@@ -3,11 +3,11 @@ import mimetypes
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from pembot.pdf2markdown.extract import MarkdownPDFExtractor
|
|
5
5
|
import os
|
|
6
|
-
import json
|
|
7
6
|
import pandas as pd
|
|
8
|
-
from typing import Literal, Union
|
|
7
|
+
from typing import Literal, Union
|
|
9
8
|
import tempfile
|
|
10
9
|
from datetime import datetime, date
|
|
10
|
+
from tabulate import tabulate
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
PandasReadEngineType = Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb', 'calamine', None]
|
|
@@ -53,10 +53,7 @@ class Convertor():
|
|
|
53
53
|
self.output= output_file.read()
|
|
54
54
|
elif file_type == 'excel':
|
|
55
55
|
self.input_filepath= myfile
|
|
56
|
-
self.
|
|
57
|
-
self.convert_file_to_json()
|
|
58
|
-
with open(output_dir / (myfile.stem + '.json')) as output_file:
|
|
59
|
-
self.output= output_file.read()
|
|
56
|
+
self.output= self.convert_excel_to_markdown()
|
|
60
57
|
|
|
61
58
|
elif output_dir is not None and myfile is not None:
|
|
62
59
|
print("got output path for conversion: ", output_dir)
|
|
@@ -64,8 +61,6 @@ class Convertor():
|
|
|
64
61
|
|
|
65
62
|
self.output_dir= output_dir
|
|
66
63
|
self.input_filepath= myfile
|
|
67
|
-
base_name, _ = os.path.splitext(myfile.name)
|
|
68
|
-
self.json_filepath = output_dir / 'json' / (base_name + ".json")
|
|
69
64
|
|
|
70
65
|
if mt == 'application/json':
|
|
71
66
|
print("the file was json")
|
|
@@ -73,242 +68,80 @@ class Convertor():
|
|
|
73
68
|
print("the file was pdf, outputting in: ", output_dir)
|
|
74
69
|
extractor= MarkdownPDFExtractor(str(myfile), output_path= str(self.output_dir), page_delimiter= "-- NEXT PAGE --", model_name= model_name)
|
|
75
70
|
extractor.extract()
|
|
71
|
+
with open(self.output_dir / (myfile.stem + '.md')) as output_file:
|
|
72
|
+
self.output= output_file.read()
|
|
76
73
|
|
|
77
74
|
elif mt in EXCEL_FILE_TYPES:
|
|
78
|
-
self.
|
|
75
|
+
self.output = self.convert_excel_to_markdown()
|
|
79
76
|
|
|
80
77
|
else:
|
|
81
78
|
print(mt)
|
|
82
79
|
|
|
83
|
-
def
|
|
84
|
-
self,
|
|
85
|
-
sheet_to_convert: Union[str, int, None] = None, # Relevant for Excel/ODS
|
|
86
|
-
orient: Literal['dict', 'list', 'series', 'split', 'records', 'index'] = 'records', # Corrected type hint
|
|
87
|
-
date_format: Union[str, None] = 'iso', # 'iso', 'epoch', or None
|
|
88
|
-
csv_encoding: str = 'utf-8', # For reading CSV files
|
|
89
|
-
excel_ods_engine: PandasReadEngineType = None # For Excel/ODS, e.g., 'openpyxl', 'xlrd', 'odf'
|
|
90
|
-
) -> bool:
|
|
80
|
+
def convert_excel_to_markdown(self, excel_ods_engine: PandasReadEngineType = None) -> str:
|
|
91
81
|
"""
|
|
92
|
-
Converts an Excel
|
|
93
|
-
|
|
82
|
+
Converts all sheets from an Excel or ODS file into a single Markdown string.
|
|
83
|
+
Each sheet is converted to a Markdown table, prefixed with the sheet's name.
|
|
94
84
|
|
|
95
85
|
Args:
|
|
96
|
-
sheet_to_convert (str | int | None, optional):
|
|
97
|
-
- For Excel/ODS:
|
|
98
|
-
- If None (default): Converts all sheets. The JSON output will be a
|
|
99
|
-
dictionary where keys are sheet names and values are the JSON
|
|
100
|
-
representation of each sheet.
|
|
101
|
-
- If str: Name of the specific sheet to convert.
|
|
102
|
-
- If int: Index of the specific sheet to convert (0-based).
|
|
103
|
-
If a specific sheet is requested, the JSON output will directly be
|
|
104
|
-
the representation of that sheet.
|
|
105
|
-
- For CSV: This parameter is ignored. The entire CSV is processed.
|
|
106
|
-
orient (str, optional): Pandas DataFrame.to_dict() orientation for each sheet/CSV.
|
|
107
|
-
Default: 'records'. See pandas.DataFrame.to_dict() documentation.
|
|
108
|
-
date_format (str | None, optional): Format for datetime objects.
|
|
109
|
-
- 'iso' (default): ISO8601 format (e.g., '2023-10-27T10:30:00').
|
|
110
|
-
- 'epoch': Milliseconds since epoch.
|
|
111
|
-
- None: Pandas default (often Timestamps). 'iso' is generally safer for JSON.
|
|
112
|
-
csv_encoding (str, optional): Encoding for reading CSV files. Default is 'utf-8'.
|
|
113
86
|
excel_ods_engine (str | None, optional): Pandas engine for reading Excel or ODS files.
|
|
114
87
|
- For Excel: 'openpyxl' (for .xlsx), 'xlrd' (for .xls).
|
|
115
88
|
- For ODS: 'odf' (requires 'odfpy' library).
|
|
116
89
|
If None, pandas auto-detects based on file extension and installed libraries.
|
|
117
90
|
|
|
118
91
|
Returns:
|
|
119
|
-
|
|
92
|
+
str: A string containing the Markdown tables for all sheets, or an error message.
|
|
120
93
|
"""
|
|
121
|
-
|
|
122
94
|
input_filepath = self.input_filepath
|
|
123
|
-
|
|
95
|
+
markdown_output = []
|
|
124
96
|
|
|
97
|
+
file_suffix= ''
|
|
125
98
|
try:
|
|
126
|
-
|
|
127
99
|
if not input_filepath.exists():
|
|
128
|
-
|
|
129
|
-
return False
|
|
130
|
-
|
|
131
|
-
# Ensure output directory exists
|
|
132
|
-
json_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
return f"Error: Input file not found at {input_filepath}"
|
|
133
101
|
|
|
134
102
|
file_suffix = input_filepath.suffix.lower()
|
|
135
|
-
output_data_final: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
|
|
136
|
-
|
|
137
|
-
dataframes_to_process: list[tuple[pd.DataFrame, str | None]] = []
|
|
138
|
-
|
|
139
103
|
current_engine: PandasReadEngineType = excel_ods_engine
|
|
140
104
|
|
|
141
|
-
if file_suffix
|
|
142
|
-
if
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
print(f"Error reading CSV file '{input_filepath.name}': {e}")
|
|
149
|
-
return False
|
|
150
|
-
|
|
151
|
-
elif file_suffix in ['.xls', '.xlsx', '.ods']:
|
|
152
|
-
try:
|
|
153
|
-
if file_suffix == '.ods':
|
|
154
|
-
if current_engine is None:
|
|
155
|
-
current_engine = 'odf'
|
|
156
|
-
elif current_engine != 'odf':
|
|
157
|
-
print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
|
|
158
|
-
current_engine = 'odf'
|
|
159
|
-
|
|
160
|
-
if sheet_to_convert is not None:
|
|
161
|
-
df = pd.read_excel(input_filepath, sheet_name=sheet_to_convert, engine=current_engine)
|
|
162
|
-
dataframes_to_process.append((df, None))
|
|
163
|
-
|
|
164
|
-
else:
|
|
165
|
-
excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
|
|
166
|
-
if not excel_file.sheet_names:
|
|
167
|
-
print(f"Warning: File '{input_filepath.name}' contains no sheets.")
|
|
168
|
-
for sheet_name in excel_file.sheet_names:
|
|
169
|
-
df = excel_file.parse(sheet_name) # engine is inherited
|
|
170
|
-
dataframes_to_process.append((df, sheet_name))
|
|
171
|
-
except ImportError as ie:
|
|
172
|
-
if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
|
|
173
|
-
print(f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'.")
|
|
174
|
-
elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
|
|
175
|
-
print(f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'.")
|
|
176
|
-
elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
|
|
177
|
-
print(f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'.")
|
|
178
|
-
else:
|
|
179
|
-
print(f"ImportError reading file '{input_filepath.name}': {ie}")
|
|
180
|
-
return False
|
|
181
|
-
except Exception as e:
|
|
182
|
-
print(f"Error reading Excel/ODS file '{input_filepath.name}': {e}")
|
|
183
|
-
return False
|
|
184
|
-
else:
|
|
185
|
-
print(f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file.")
|
|
186
|
-
return False
|
|
187
|
-
|
|
188
|
-
if not dataframes_to_process and file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None:
|
|
189
|
-
print(f"Info: No dataframes were loaded from '{input_filepath.name}'. Output JSON will be empty if processing all sheets from an empty file.")
|
|
190
|
-
elif not dataframes_to_process and not (file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None):
|
|
191
|
-
pass
|
|
192
|
-
|
|
193
|
-
is_direct_output = len(dataframes_to_process) == 1 and dataframes_to_process[0][1] is None
|
|
194
|
-
temp_processed_data: Dict[str, Any] = {}
|
|
195
|
-
|
|
196
|
-
for df_original, name_key in dataframes_to_process:
|
|
197
|
-
df = df_original.copy()
|
|
198
|
-
|
|
199
|
-
# Handle datetime columns with improved detection and conversion
|
|
200
|
-
if date_format:
|
|
201
|
-
# Check for datetime columns using multiple approaches
|
|
202
|
-
datetime_columns = []
|
|
203
|
-
|
|
204
|
-
# Method 1: Use pandas dtype detection
|
|
205
|
-
datetime_columns.extend(df.select_dtypes(include=['datetime64[ns]', 'datetime', 'datetimetz']).columns.tolist())
|
|
206
|
-
|
|
207
|
-
# Method 2: Check for datetime objects in each column
|
|
208
|
-
for col in df.columns:
|
|
209
|
-
if col not in datetime_columns:
|
|
210
|
-
# Sample a few non-null values to check type
|
|
211
|
-
sample_values = df[col].dropna().head(10)
|
|
212
|
-
if len(sample_values) > 0:
|
|
213
|
-
for val in sample_values:
|
|
214
|
-
if isinstance(val, (datetime, date, pd.Timestamp)):
|
|
215
|
-
datetime_columns.append(col)
|
|
216
|
-
break
|
|
217
|
-
|
|
218
|
-
# Convert datetime columns
|
|
219
|
-
for col_name in datetime_columns:
|
|
220
|
-
try:
|
|
221
|
-
if date_format == 'iso':
|
|
222
|
-
df[col_name] = df[col_name].apply(lambda x: self._convert_to_iso(x))
|
|
223
|
-
elif date_format == 'epoch':
|
|
224
|
-
df[col_name] = df[col_name].apply(lambda x: self._convert_to_epoch(x))
|
|
225
|
-
except Exception as e_date:
|
|
226
|
-
print(f"Warning: Could not fully convert date column '{col_name}' in '{name_key or input_filepath.name}' using format '{date_format}'. Error: {e_date}")
|
|
227
|
-
|
|
228
|
-
# Replace NaN values with None for JSON compatibility
|
|
229
|
-
df = df.astype(object).where(pd.notnull(df), None)
|
|
230
|
-
|
|
231
|
-
# Final safety check: convert any remaining datetime objects
|
|
232
|
-
for col in df.columns:
|
|
233
|
-
df[col] = df[col].apply(lambda x: self._safe_datetime_convert(x, date_format))
|
|
234
|
-
|
|
235
|
-
current_json_segment = df.to_dict(orient=orient)
|
|
236
|
-
|
|
237
|
-
if is_direct_output:
|
|
238
|
-
output_data_final = current_json_segment
|
|
239
|
-
break
|
|
240
|
-
else:
|
|
241
|
-
if name_key is not None:
|
|
242
|
-
temp_processed_data[name_key] = current_json_segment
|
|
243
|
-
|
|
244
|
-
if not is_direct_output:
|
|
245
|
-
output_data_final = temp_processed_data
|
|
246
|
-
|
|
247
|
-
with open(json_filepath, 'w', encoding='utf-8') as f:
|
|
248
|
-
json.dump(output_data_final, f, indent=4, ensure_ascii=False)
|
|
249
|
-
|
|
250
|
-
print(f"Successfully converted '{input_filepath.name}' to '{json_filepath.name}'")
|
|
251
|
-
return True
|
|
252
|
-
|
|
253
|
-
except FileNotFoundError:
|
|
254
|
-
print(f"Error: Input file not found at {input_filepath.name}")
|
|
255
|
-
return False
|
|
256
|
-
except ValueError as ve:
|
|
257
|
-
print(f"ValueError during conversion of '{input_filepath.name}': {ve}")
|
|
258
|
-
return False
|
|
259
|
-
except Exception as e:
|
|
260
|
-
print(f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}")
|
|
261
|
-
return False
|
|
105
|
+
if file_suffix in ['.xls', '.xlsx', '.ods']:
|
|
106
|
+
if file_suffix == '.ods':
|
|
107
|
+
if current_engine is None:
|
|
108
|
+
current_engine = 'odf'
|
|
109
|
+
elif current_engine != 'odf':
|
|
110
|
+
print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
|
|
111
|
+
current_engine = 'odf'
|
|
262
112
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
return None
|
|
113
|
+
excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
|
|
114
|
+
if not excel_file.sheet_names:
|
|
115
|
+
return f"Warning: File '{input_filepath.name}' contains no sheets."
|
|
267
116
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
return value.isoformat()
|
|
275
|
-
else:
|
|
276
|
-
return str(value)
|
|
277
|
-
except:
|
|
278
|
-
return str(value) if value is not None else None
|
|
117
|
+
for sheet_name in excel_file.sheet_names:
|
|
118
|
+
df = excel_file.parse(sheet_name)
|
|
119
|
+
markdown_output.append(f"## {sheet_name}\n")
|
|
120
|
+
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
121
|
+
markdown_output.append(markdown_table)
|
|
122
|
+
markdown_output.append("\n")
|
|
279
123
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
124
|
+
return "\n".join(markdown_output)
|
|
125
|
+
|
|
126
|
+
elif file_suffix == '.csv':
|
|
127
|
+
df = pd.read_csv(input_filepath)
|
|
128
|
+
markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
|
|
129
|
+
return markdown_table
|
|
284
130
|
|
|
285
|
-
try:
|
|
286
|
-
if isinstance(value, (int, float)):
|
|
287
|
-
return int(value) # Assume already epoch
|
|
288
|
-
elif hasattr(value, 'timestamp'):
|
|
289
|
-
return int(value.timestamp() * 1000)
|
|
290
|
-
elif isinstance(value, pd.Timestamp):
|
|
291
|
-
return int(value.timestamp() * 1000)
|
|
292
131
|
else:
|
|
293
|
-
return
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
# If it's a datetime-like object, convert it
|
|
303
|
-
if isinstance(value, (datetime, date, pd.Timestamp)):
|
|
304
|
-
if date_format == 'iso':
|
|
305
|
-
return self._convert_to_iso(value)
|
|
306
|
-
elif date_format == 'epoch':
|
|
307
|
-
return self._convert_to_epoch(value)
|
|
132
|
+
return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
|
|
133
|
+
|
|
134
|
+
except ImportError as ie:
|
|
135
|
+
if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
|
|
136
|
+
return f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'."
|
|
137
|
+
elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
|
|
138
|
+
return f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'."
|
|
139
|
+
elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
|
|
140
|
+
return f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'."
|
|
308
141
|
else:
|
|
309
|
-
return
|
|
310
|
-
|
|
311
|
-
|
|
142
|
+
return f"ImportError reading file '{input_filepath.name}': {ie}"
|
|
143
|
+
except Exception as e:
|
|
144
|
+
return f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}"
|
|
312
145
|
|
|
313
146
|
|
|
314
147
|
def chunk_text(text, chunk_size=500, overlap_size=50):
|
|
@@ -337,29 +170,29 @@ def chunk_text(text, chunk_size=500, overlap_size=50):
|
|
|
337
170
|
if __name__ == '__main__':
|
|
338
171
|
print("Test Run Start:")
|
|
339
172
|
try:
|
|
340
|
-
print("Test 1: scaned pdf page, bytes")
|
|
341
|
-
with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
|
|
342
|
-
|
|
343
|
-
|
|
173
|
+
# print("Test 1: scaned pdf page, bytes")
|
|
174
|
+
# with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
|
|
175
|
+
# conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
|
|
176
|
+
# print(conv.output)
|
|
344
177
|
|
|
345
178
|
# print("Test 2: JD pdf, bytes")
|
|
346
179
|
# with open("/home/cyto/dev/pembotdir/jds/PM Trainee.pdf", "rb") as imgpdf:
|
|
347
180
|
# conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
|
|
348
181
|
# print(conv.output)
|
|
349
182
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
183
|
+
print("Test 3: excel schedule, bytes")
|
|
184
|
+
with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
|
|
185
|
+
conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
|
|
186
|
+
print(conv.output)
|
|
354
187
|
|
|
355
188
|
# without bytes example:
|
|
356
189
|
print("Test 4: scanned pdf, path")
|
|
357
190
|
conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
|
|
358
191
|
print(conv.output)
|
|
359
192
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
193
|
+
print("Test 5: schedule excel, path")
|
|
194
|
+
conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
|
|
195
|
+
print(conv.output)
|
|
363
196
|
except FileNotFoundError as fe:
|
|
364
197
|
print("file not found, modify the driver code to get sample files to test:\n\n", fe)
|
|
365
198
|
except Exception as e:
|
pembot/__init__.py
CHANGED
pembot/config/config.yaml
CHANGED
pembot/requirements.txt
CHANGED
|
@@ -9,6 +9,7 @@ cffi==1.17.1
|
|
|
9
9
|
charset-normalizer==3.4.2
|
|
10
10
|
click==8.2.1
|
|
11
11
|
cryptography==45.0.5
|
|
12
|
+
defusedxml==0.7.1
|
|
12
13
|
dnspython==2.7.0
|
|
13
14
|
duckduckgo_search==8.1.1
|
|
14
15
|
et_xmlfile==2.0.0
|
|
@@ -36,6 +37,7 @@ MarkupSafe==3.0.2
|
|
|
36
37
|
mdurl==0.1.2
|
|
37
38
|
msgpack==1.1.1
|
|
38
39
|
numpy==2.3.1
|
|
40
|
+
odfpy==1.4.1
|
|
39
41
|
ollama==0.5.1
|
|
40
42
|
openpyxl==3.1.5
|
|
41
43
|
orjson==3.10.18
|
|
@@ -44,7 +46,7 @@ pandas==2.3.0
|
|
|
44
46
|
pathlib==1.0.1
|
|
45
47
|
pdfminer.six==20250506
|
|
46
48
|
pdfplumber==0.11.7
|
|
47
|
-
pembot==0.1.
|
|
49
|
+
pembot==0.1.3
|
|
48
50
|
pillow==11.3.0
|
|
49
51
|
primp==0.15.0
|
|
50
52
|
pyasn1==0.6.1
|
|
@@ -76,6 +78,7 @@ smolagents==1.20.0
|
|
|
76
78
|
sniffio==1.3.1
|
|
77
79
|
soupsieve==2.7
|
|
78
80
|
starlette==0.46.2
|
|
81
|
+
tabulate==0.9.0
|
|
79
82
|
tenacity==8.5.0
|
|
80
83
|
tomlkit==0.13.3
|
|
81
84
|
tqdm==4.67.1
|
|
@@ -86,3 +89,4 @@ tzdata==2025.2
|
|
|
86
89
|
urllib3==2.5.0
|
|
87
90
|
uvicorn==0.35.0
|
|
88
91
|
websockets==15.0.1
|
|
92
|
+
xlrd==2.0.2
|
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
pembot/.gitignore,sha256=
|
|
1
|
+
pembot/.gitignore,sha256=yyDEUmeqZekG4AOrU9Zvu2ZQhJvEzEg_lQp2CDfBhXM,92
|
|
2
2
|
pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
pembot/__init__.py,sha256=
|
|
3
|
+
pembot/__init__.py,sha256=BSH5pBwk4cE-px43hTajMIU1KPohYzz7NSoELkXBd7s,211
|
|
4
4
|
pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
|
|
5
5
|
pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
|
|
6
6
|
pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
|
|
7
7
|
pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
|
|
8
8
|
pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
|
|
9
9
|
pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
|
|
10
|
-
pembot/requirements.txt,sha256=
|
|
10
|
+
pembot/requirements.txt,sha256=bSXSM-tdrwGooRpz9e1VE3Yt9hXwz3inSq2PYpPw3hU,1567
|
|
11
11
|
pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
|
|
12
|
-
pembot/.git/COMMIT_EDITMSG,sha256=
|
|
12
|
+
pembot/.git/COMMIT_EDITMSG,sha256=PRRNgCxdUYryr-DaiG3MSBswsgdYtfleiBnQX_2R64U,44
|
|
13
13
|
pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
|
14
14
|
pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
|
|
15
15
|
pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
|
|
16
|
-
pembot/.git/index,sha256=
|
|
16
|
+
pembot/.git/index,sha256=5oJw9H2tF6dW5jUIaChBVJVZAfTwLdB-F-cyM_oLdx0,2054
|
|
17
17
|
pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
|
|
18
18
|
pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
|
|
19
19
|
pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
|
|
@@ -30,10 +30,10 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
|
|
|
30
30
|
pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
|
|
31
31
|
pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
|
32
32
|
pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
|
33
|
-
pembot/.git/logs/HEAD,sha256=
|
|
34
|
-
pembot/.git/logs/refs/heads/main,sha256=
|
|
33
|
+
pembot/.git/logs/HEAD,sha256=yPZc9m6stXELdpuwEBMSjn0rVegOtxHb8YcCsBL0USA,4131
|
|
34
|
+
pembot/.git/logs/refs/heads/main,sha256=yPZc9m6stXELdpuwEBMSjn0rVegOtxHb8YcCsBL0USA,4131
|
|
35
35
|
pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
|
|
36
|
-
pembot/.git/logs/refs/remotes/origin/main,sha256=
|
|
36
|
+
pembot/.git/logs/refs/remotes/origin/main,sha256=i4leZYJW4JljsOW0xXKwBmnDOlygP8qJN378MfpDauM,2336
|
|
37
37
|
pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
|
|
38
38
|
pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
|
|
39
39
|
pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
|
|
@@ -44,10 +44,14 @@ pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05
|
|
|
44
44
|
pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
|
|
45
45
|
pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
|
|
46
46
|
pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
|
|
47
|
+
pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5,sha256=OhI6pEx_G6KbujS7idkp5MxJd1Aw92Wn3Sl-JBgU2VU,115
|
|
47
48
|
pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
|
|
48
49
|
pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
|
|
49
50
|
pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
|
|
51
|
+
pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101,sha256=Zpth_iVM6H5W4u5jLVEKdRz0i3ydBLm5XJql4ieuj8U,169
|
|
50
52
|
pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
|
|
53
|
+
pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a,sha256=tNzFPYJ0Y6YpaNw4w2hRH_0iTa5fNlC2nzARkKFbIec,162
|
|
54
|
+
pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2,sha256=SQ84I7DnyPaaxoWCBoh20Iw1VZm8wgSaPaL5uDR-R8I,90
|
|
51
55
|
pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
|
|
52
56
|
pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
|
|
53
57
|
pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
|
|
@@ -62,11 +66,13 @@ pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D
|
|
|
62
66
|
pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
|
|
63
67
|
pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
|
|
64
68
|
pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
|
|
69
|
+
pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331,sha256=Kx2Tcs17_chpF5rbY3AB34Cj1S3DGnr7Y1tZOTxvrdM,80
|
|
65
70
|
pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4,sha256=jt9lsSz8c3dw9PyfEEtkReCC_8YLXSKuc6ykSJCKZPM,487
|
|
66
71
|
pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
|
|
67
72
|
pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
|
|
68
73
|
pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
|
|
69
74
|
pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
|
|
75
|
+
pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f,sha256=g84QcQu-1NZ4-MfLHRosIUOnlK0VItVBqqFW5ffGDNI,882
|
|
70
76
|
pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
|
|
71
77
|
pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
|
|
72
78
|
pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
|
|
@@ -75,12 +81,14 @@ pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLy
|
|
|
75
81
|
pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
|
|
76
82
|
pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
|
|
77
83
|
pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
|
|
84
|
+
pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e,sha256=0aByFDI4DyyfA-TKCFoUXbQAvNabJIV7CXHSIDna5bo,2833
|
|
78
85
|
pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd,sha256=7AXaYVgItbw3xQiEqeRyO5qdIedIxDoI9hTDn8CBRxM,56
|
|
79
86
|
pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
|
|
80
87
|
pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
|
|
81
88
|
pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
|
|
82
89
|
pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
|
|
83
90
|
pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
|
|
91
|
+
pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138,sha256=7VMQzB6baLdC2Uj5f84w-X6XLM3GinXGBQjewhXupAc,914
|
|
84
92
|
pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
|
|
85
93
|
pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
|
|
86
94
|
pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
|
|
@@ -102,13 +110,17 @@ pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d,sha256=6cl8NMNQ9b5
|
|
|
102
110
|
pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb,sha256=zfd9KnP9YtBMwzci1BMWFHAQR4BWJ3XQsyr-rFqdw0Q,135
|
|
103
111
|
pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d,sha256=6EJskrHAkqVAC5ExxIZDQT_2kZWhfLPPAPbX61tmwgw,170
|
|
104
112
|
pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3,sha256=xCom1B6wyws8ZNTJoIL4JtVIXNv1yPCwsXfNsVCAGQA,4410
|
|
113
|
+
pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15,sha256=Kr92INW6aFVOO0iZm0J2y2Yld9N1Dg-fP6zP1_cqe0g,525
|
|
105
114
|
pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297,sha256=JOkICUEv6tdVp7mYDUKtXnsWq3IIZSmm8iUP7OqQwc4,56
|
|
106
115
|
pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e,sha256=MpiiCqAk6GQ5iGzeThU0rsabrgA5tCAgdIWudAM0IrA,420
|
|
107
116
|
pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFtMJgNKaPp6nTGDlMhPs6fkZTWevQWK_Lc,56
|
|
108
117
|
pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
|
|
109
118
|
pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
|
|
119
|
+
pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a,sha256=GPQso_R_RWWLx_pF3g58MiM4HyeSnpXTeLeKDfhkyPc,526
|
|
110
120
|
pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
|
|
121
|
+
pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab,sha256=HAXSsWokz2tuk9Y952ogIEzSBlbUC4lZ1CjvWBc22Cg,56
|
|
111
122
|
pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
|
|
123
|
+
pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495,sha256=7ZXWsXqapYhbZZJwaaeAwqGcgX8JwoS5DazqOGaRHeQ,179
|
|
112
124
|
pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
|
|
113
125
|
pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
|
|
114
126
|
pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
|
|
@@ -140,17 +152,17 @@ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj
|
|
|
140
152
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
|
|
141
153
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
|
|
142
154
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
|
|
143
|
-
pembot/.git/refs/heads/main,sha256=
|
|
155
|
+
pembot/.git/refs/heads/main,sha256=a4NhMFSYteuWFt7KclttMW9wgrlLiv3bnLAY88lZVCU,41
|
|
144
156
|
pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
|
|
145
|
-
pembot/.git/refs/remotes/origin/main,sha256=
|
|
157
|
+
pembot/.git/refs/remotes/origin/main,sha256=a4NhMFSYteuWFt7KclttMW9wgrlLiv3bnLAY88lZVCU,41
|
|
146
158
|
pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
147
|
-
pembot/AnyToText/convertor.py,sha256=
|
|
159
|
+
pembot/AnyToText/convertor.py,sha256=L0d6AevJBtyC-5pP-vJGHR_Uaumf0iWPNYLmwGaiUHI,8772
|
|
148
160
|
pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
161
|
pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
|
|
150
162
|
pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
|
|
151
163
|
pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
|
|
152
164
|
pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
|
|
153
|
-
pembot/config/config.yaml,sha256=
|
|
165
|
+
pembot/config/config.yaml,sha256=DMNFp9EdsN22ZGe8Tp4DRCn-wAWSinhN5oI1muXE-_Y,156
|
|
154
166
|
pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
|
|
155
167
|
pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
|
|
156
168
|
pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -206,7 +218,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
|
|
|
206
218
|
pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
207
219
|
pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
|
|
208
220
|
pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
|
|
209
|
-
pembot-0.1.
|
|
210
|
-
pembot-0.1.
|
|
211
|
-
pembot-0.1.
|
|
212
|
-
pembot-0.1.
|
|
221
|
+
pembot-0.1.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
222
|
+
pembot-0.1.3.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
|
|
223
|
+
pembot-0.1.3.dist-info/METADATA,sha256=vftTQKyRwrw4BFJ_hdonub57buM5DyRXDWw28sUt884,313
|
|
224
|
+
pembot-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|