pembot 0.1.2__py2.py3-none-any.whl → 0.1.4__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (36) hide show
  1. pembot/.git/COMMIT_EDITMSG +1 -1
  2. pembot/.git/index +0 -0
  3. pembot/.git/logs/HEAD +3 -0
  4. pembot/.git/logs/refs/heads/main +3 -0
  5. pembot/.git/logs/refs/remotes/origin/main +3 -0
  6. pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b +1 -0
  7. pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4 +1 -0
  8. pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2 +0 -0
  9. pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
  10. pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
  11. pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
  12. pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
  13. pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835 +0 -0
  14. pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
  15. pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
  16. pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
  17. pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663 +0 -0
  18. pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
  19. pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349 +0 -0
  20. pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7 +0 -0
  21. pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
  22. pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
  23. pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
  24. pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
  25. pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67 +1 -0
  26. pembot/.git/refs/heads/main +1 -1
  27. pembot/.git/refs/remotes/origin/main +1 -1
  28. pembot/.gitignore +0 -1
  29. pembot/AnyToText/convertor.py +62 -225
  30. pembot/__init__.py +1 -1
  31. pembot/config/config.yaml +1 -1
  32. pembot/requirements.txt +5 -1
  33. {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/METADATA +1 -1
  34. {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/RECORD +36 -16
  35. {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/WHEEL +0 -0
  36. {pembot-0.1.2.dist-info → pembot-0.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- minor oopsie
1
+ cyto/put the output of the converted file in excel case to write to file if path is given
pembot/.git/index CHANGED
Binary file
pembot/.git/logs/HEAD CHANGED
@@ -13,3 +13,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
13
13
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
14
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
15
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
16
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
17
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
18
+ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880975 +0530 commit: cyto/put the output of the converted file in excel case to write to file if path is given
@@ -13,3 +13,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
13
13
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
14
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
15
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
16
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
17
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
18
+ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880975 +0530 commit: cyto/put the output of the converted file in excel case to write to file if path is given
@@ -12,3 +12,6 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
12
12
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
13
13
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
14
14
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859659 +0530 update by push
15
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865472 +0530 update by push
16
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877832 +0530 update by push
17
+ c3cc0da3d955ecec0f865c46c030a0c073697495 e6adbc3c373070269f97ef82d4f63027d7878f67 cyto <silverstone965@gmail.com> 1758880990 +0530 update by push
@@ -0,0 +1 @@
1
+ xe��� E����.Mc�����M�Cͣb��@���.���{�v��ж;)�8C���q�p$� ��b��ҁv<nA���nN������c�e�WpT8�0���%���*Y*�Rp�jߴͱ&��ƣ
@@ -0,0 +1 @@
1
+ x+)JMU0�d040031Q�����,���+�dx6���M�9{wk�+��q�IO�D������Ԣ��"�:��5:f�x1a�]�`ܱ;�3L�M#�
@@ -0,0 +1 @@
1
+ x��Kn�@ D���GJ ��#��\�c��Ш1���a&7Ȯ>*Փ�,��!�XUT
@@ -1 +1 @@
1
- 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
1
+ e6adbc3c373070269f97ef82d4f63027d7878f67
@@ -1 +1 @@
1
- 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
1
+ e6adbc3c373070269f97ef82d4f63027d7878f67
pembot/.gitignore CHANGED
@@ -1,4 +1,3 @@
1
- *.txt
2
1
  TextEmbedder/__pycache__/
3
2
  pdf2markdown/
4
3
  __pycache__/
@@ -3,11 +3,11 @@ import mimetypes
3
3
  from pathlib import Path
4
4
  from pembot.pdf2markdown.extract import MarkdownPDFExtractor
5
5
  import os
6
- import json
7
6
  import pandas as pd
8
- from typing import Literal, Union, Dict, Any, List
7
+ from typing import Literal, Union
9
8
  import tempfile
10
9
  from datetime import datetime, date
10
+ from tabulate import tabulate
11
11
 
12
12
 
13
13
  PandasReadEngineType = Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb', 'calamine', None]
@@ -53,10 +53,11 @@ class Convertor():
53
53
  self.output= output_file.read()
54
54
  elif file_type == 'excel':
55
55
  self.input_filepath= myfile
56
- self.json_filepath = output_dir / (myfile.stem + ".json")
57
- self.convert_file_to_json()
58
- with open(output_dir / (myfile.stem + '.json')) as output_file:
59
- self.output= output_file.read()
56
+ self.output= self.convert_excel_to_markdown()
57
+ if myfile and output_dir:
58
+ with open(output_dir / (myfile.stem + '.md'), "w") as output_file:
59
+ output_file.write(self.output)
60
+
60
61
 
61
62
  elif output_dir is not None and myfile is not None:
62
63
  print("got output path for conversion: ", output_dir)
@@ -64,8 +65,6 @@ class Convertor():
64
65
 
65
66
  self.output_dir= output_dir
66
67
  self.input_filepath= myfile
67
- base_name, _ = os.path.splitext(myfile.name)
68
- self.json_filepath = output_dir / 'json' / (base_name + ".json")
69
68
 
70
69
  if mt == 'application/json':
71
70
  print("the file was json")
@@ -73,242 +72,80 @@ class Convertor():
73
72
  print("the file was pdf, outputting in: ", output_dir)
74
73
  extractor= MarkdownPDFExtractor(str(myfile), output_path= str(self.output_dir), page_delimiter= "-- NEXT PAGE --", model_name= model_name)
75
74
  extractor.extract()
75
+ with open(self.output_dir / (myfile.stem + '.md')) as output_file:
76
+ self.output= output_file.read()
76
77
 
77
78
  elif mt in EXCEL_FILE_TYPES:
78
- self.convert_file_to_json()
79
+ self.output = self.convert_excel_to_markdown()
79
80
 
80
81
  else:
81
82
  print(mt)
82
83
 
83
- def convert_file_to_json(
84
- self,
85
- sheet_to_convert: Union[str, int, None] = None, # Relevant for Excel/ODS
86
- orient: Literal['dict', 'list', 'series', 'split', 'records', 'index'] = 'records', # Corrected type hint
87
- date_format: Union[str, None] = 'iso', # 'iso', 'epoch', or None
88
- csv_encoding: str = 'utf-8', # For reading CSV files
89
- excel_ods_engine: PandasReadEngineType = None # For Excel/ODS, e.g., 'openpyxl', 'xlrd', 'odf'
90
- ) -> bool:
84
+ def convert_excel_to_markdown(self, excel_ods_engine: PandasReadEngineType = None) -> str:
91
85
  """
92
- Converts an Excel, ODS, or CSV file (or a specific Excel/ODS sheet)
93
- into an equivalent JSON format.
86
+ Converts all sheets from an Excel or ODS file into a single Markdown string.
87
+ Each sheet is converted to a Markdown table, prefixed with the sheet's name.
94
88
 
95
89
  Args:
96
- sheet_to_convert (str | int | None, optional):
97
- - For Excel/ODS:
98
- - If None (default): Converts all sheets. The JSON output will be a
99
- dictionary where keys are sheet names and values are the JSON
100
- representation of each sheet.
101
- - If str: Name of the specific sheet to convert.
102
- - If int: Index of the specific sheet to convert (0-based).
103
- If a specific sheet is requested, the JSON output will directly be
104
- the representation of that sheet.
105
- - For CSV: This parameter is ignored. The entire CSV is processed.
106
- orient (str, optional): Pandas DataFrame.to_dict() orientation for each sheet/CSV.
107
- Default: 'records'. See pandas.DataFrame.to_dict() documentation.
108
- date_format (str | None, optional): Format for datetime objects.
109
- - 'iso' (default): ISO8601 format (e.g., '2023-10-27T10:30:00').
110
- - 'epoch': Milliseconds since epoch.
111
- - None: Pandas default (often Timestamps). 'iso' is generally safer for JSON.
112
- csv_encoding (str, optional): Encoding for reading CSV files. Default is 'utf-8'.
113
90
  excel_ods_engine (str | None, optional): Pandas engine for reading Excel or ODS files.
114
91
  - For Excel: 'openpyxl' (for .xlsx), 'xlrd' (for .xls).
115
92
  - For ODS: 'odf' (requires 'odfpy' library).
116
93
  If None, pandas auto-detects based on file extension and installed libraries.
117
94
 
118
95
  Returns:
119
- bool: True if conversion was successful, False otherwise.
96
+ str: A string containing the Markdown tables for all sheets, or an error message.
120
97
  """
121
-
122
98
  input_filepath = self.input_filepath
123
- json_filepath = self.json_filepath
99
+ markdown_output = []
124
100
 
101
+ file_suffix= ''
125
102
  try:
126
-
127
103
  if not input_filepath.exists():
128
- print(f"Error: Input file not found at {input_filepath}")
129
- return False
130
-
131
- # Ensure output directory exists
132
- json_filepath.parent.mkdir(parents=True, exist_ok=True)
104
+ return f"Error: Input file not found at {input_filepath}"
133
105
 
134
106
  file_suffix = input_filepath.suffix.lower()
135
- output_data_final: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
136
-
137
- dataframes_to_process: list[tuple[pd.DataFrame, str | None]] = []
138
-
139
107
  current_engine: PandasReadEngineType = excel_ods_engine
140
108
 
141
- if file_suffix == '.csv':
142
- if sheet_to_convert is not None:
143
- print(f"Info: 'sheet_to_convert' parameter ('{sheet_to_convert}') is ignored for CSV file '{input_filepath.name}'. Processing entire CSV.")
144
- try:
145
- df = pd.read_csv(input_filepath, encoding=csv_encoding)
146
- dataframes_to_process.append((df, None))
147
- except Exception as e:
148
- print(f"Error reading CSV file '{input_filepath.name}': {e}")
149
- return False
150
-
151
- elif file_suffix in ['.xls', '.xlsx', '.ods']:
152
- try:
153
- if file_suffix == '.ods':
154
- if current_engine is None:
155
- current_engine = 'odf'
156
- elif current_engine != 'odf':
157
- print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
158
- current_engine = 'odf'
159
-
160
- if sheet_to_convert is not None:
161
- df = pd.read_excel(input_filepath, sheet_name=sheet_to_convert, engine=current_engine)
162
- dataframes_to_process.append((df, None))
163
-
164
- else:
165
- excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
166
- if not excel_file.sheet_names:
167
- print(f"Warning: File '{input_filepath.name}' contains no sheets.")
168
- for sheet_name in excel_file.sheet_names:
169
- df = excel_file.parse(sheet_name) # engine is inherited
170
- dataframes_to_process.append((df, sheet_name))
171
- except ImportError as ie:
172
- if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
173
- print(f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'.")
174
- elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
175
- print(f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'.")
176
- elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
177
- print(f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'.")
178
- else:
179
- print(f"ImportError reading file '{input_filepath.name}': {ie}")
180
- return False
181
- except Exception as e:
182
- print(f"Error reading Excel/ODS file '{input_filepath.name}': {e}")
183
- return False
184
- else:
185
- print(f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file.")
186
- return False
187
-
188
- if not dataframes_to_process and file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None:
189
- print(f"Info: No dataframes were loaded from '{input_filepath.name}'. Output JSON will be empty if processing all sheets from an empty file.")
190
- elif not dataframes_to_process and not (file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None):
191
- pass
192
-
193
- is_direct_output = len(dataframes_to_process) == 1 and dataframes_to_process[0][1] is None
194
- temp_processed_data: Dict[str, Any] = {}
195
-
196
- for df_original, name_key in dataframes_to_process:
197
- df = df_original.copy()
198
-
199
- # Handle datetime columns with improved detection and conversion
200
- if date_format:
201
- # Check for datetime columns using multiple approaches
202
- datetime_columns = []
203
-
204
- # Method 1: Use pandas dtype detection
205
- datetime_columns.extend(df.select_dtypes(include=['datetime64[ns]', 'datetime', 'datetimetz']).columns.tolist())
206
-
207
- # Method 2: Check for datetime objects in each column
208
- for col in df.columns:
209
- if col not in datetime_columns:
210
- # Sample a few non-null values to check type
211
- sample_values = df[col].dropna().head(10)
212
- if len(sample_values) > 0:
213
- for val in sample_values:
214
- if isinstance(val, (datetime, date, pd.Timestamp)):
215
- datetime_columns.append(col)
216
- break
217
-
218
- # Convert datetime columns
219
- for col_name in datetime_columns:
220
- try:
221
- if date_format == 'iso':
222
- df[col_name] = df[col_name].apply(lambda x: self._convert_to_iso(x))
223
- elif date_format == 'epoch':
224
- df[col_name] = df[col_name].apply(lambda x: self._convert_to_epoch(x))
225
- except Exception as e_date:
226
- print(f"Warning: Could not fully convert date column '{col_name}' in '{name_key or input_filepath.name}' using format '{date_format}'. Error: {e_date}")
227
-
228
- # Replace NaN values with None for JSON compatibility
229
- df = df.astype(object).where(pd.notnull(df), None)
230
-
231
- # Final safety check: convert any remaining datetime objects
232
- for col in df.columns:
233
- df[col] = df[col].apply(lambda x: self._safe_datetime_convert(x, date_format))
234
-
235
- current_json_segment = df.to_dict(orient=orient)
236
-
237
- if is_direct_output:
238
- output_data_final = current_json_segment
239
- break
240
- else:
241
- if name_key is not None:
242
- temp_processed_data[name_key] = current_json_segment
243
-
244
- if not is_direct_output:
245
- output_data_final = temp_processed_data
246
-
247
- with open(json_filepath, 'w', encoding='utf-8') as f:
248
- json.dump(output_data_final, f, indent=4, ensure_ascii=False)
249
-
250
- print(f"Successfully converted '{input_filepath.name}' to '{json_filepath.name}'")
251
- return True
252
-
253
- except FileNotFoundError:
254
- print(f"Error: Input file not found at {input_filepath.name}")
255
- return False
256
- except ValueError as ve:
257
- print(f"ValueError during conversion of '{input_filepath.name}': {ve}")
258
- return False
259
- except Exception as e:
260
- print(f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}")
261
- return False
109
+ if file_suffix in ['.xls', '.xlsx', '.ods']:
110
+ if file_suffix == '.ods':
111
+ if current_engine is None:
112
+ current_engine = 'odf'
113
+ elif current_engine != 'odf':
114
+ print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
115
+ current_engine = 'odf'
262
116
 
263
- def _convert_to_iso(self, value):
264
- """Convert datetime-like objects to ISO format string."""
265
- if pd.isnull(value) or value is None:
266
- return None
117
+ excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
118
+ if not excel_file.sheet_names:
119
+ return f"Warning: File '{input_filepath.name}' contains no sheets."
267
120
 
268
- try:
269
- if isinstance(value, str):
270
- return value # Already a string
271
- elif hasattr(value, 'isoformat'):
272
- return value.isoformat()
273
- elif isinstance(value, pd.Timestamp):
274
- return value.isoformat()
275
- else:
276
- return str(value)
277
- except:
278
- return str(value) if value is not None else None
121
+ for sheet_name in excel_file.sheet_names:
122
+ df = excel_file.parse(sheet_name)
123
+ markdown_output.append(f"## {sheet_name}\n")
124
+ markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
125
+ markdown_output.append(markdown_table)
126
+ markdown_output.append("\n")
279
127
 
280
- def _convert_to_epoch(self, value):
281
- """Convert datetime-like objects to epoch milliseconds."""
282
- if pd.isnull(value) or value is None:
283
- return None
128
+ return "\n".join(markdown_output)
129
+
130
+ elif file_suffix == '.csv':
131
+ df = pd.read_csv(input_filepath)
132
+ markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
133
+ return markdown_table
284
134
 
285
- try:
286
- if isinstance(value, (int, float)):
287
- return int(value) # Assume already epoch
288
- elif hasattr(value, 'timestamp'):
289
- return int(value.timestamp() * 1000)
290
- elif isinstance(value, pd.Timestamp):
291
- return int(value.timestamp() * 1000)
292
135
  else:
293
- return str(value)
294
- except:
295
- return str(value) if value is not None else None
296
-
297
- def _safe_datetime_convert(self, value, date_format):
298
- """Final safety conversion for any remaining datetime objects."""
299
- if pd.isnull(value) or value is None:
300
- return None
301
-
302
- # If it's a datetime-like object, convert it
303
- if isinstance(value, (datetime, date, pd.Timestamp)):
304
- if date_format == 'iso':
305
- return self._convert_to_iso(value)
306
- elif date_format == 'epoch':
307
- return self._convert_to_epoch(value)
136
+ return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
137
+
138
+ except ImportError as ie:
139
+ if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
140
+ return f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'."
141
+ elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
142
+ return f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'."
143
+ elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
144
+ return f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'."
308
145
  else:
309
- return str(value)
310
-
311
- return value
146
+ return f"ImportError reading file '{input_filepath.name}': {ie}"
147
+ except Exception as e:
148
+ return f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}"
312
149
 
313
150
 
314
151
  def chunk_text(text, chunk_size=500, overlap_size=50):
@@ -337,29 +174,29 @@ def chunk_text(text, chunk_size=500, overlap_size=50):
337
174
  if __name__ == '__main__':
338
175
  print("Test Run Start:")
339
176
  try:
340
- print("Test 1: scaned pdf page, bytes")
341
- with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
342
- conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
343
- print(conv.output)
177
+ # print("Test 1: scaned pdf page, bytes")
178
+ # with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
179
+ # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
180
+ # print(conv.output)
344
181
 
345
182
  # print("Test 2: JD pdf, bytes")
346
183
  # with open("/home/cyto/dev/pembotdir/jds/PM Trainee.pdf", "rb") as imgpdf:
347
184
  # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
348
185
  # print(conv.output)
349
186
 
350
- # print("Test 3: excel schedule, bytes")
351
- # with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
352
- # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
353
- # print(conv.output)
187
+ print("Test 3: excel schedule, bytes")
188
+ with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
189
+ conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
190
+ print(conv.output)
354
191
 
355
192
  # without bytes example:
356
193
  print("Test 4: scanned pdf, path")
357
194
  conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
358
195
  print(conv.output)
359
196
 
360
- # print("Test 5: schedule excel, path")
361
- # conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
362
- # print(conv.output)
197
+ print("Test 5: schedule excel, path")
198
+ conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
199
+ print(conv.output)
363
200
  except FileNotFoundError as fe:
364
201
  print("file not found, modify the driver code to get sample files to test:\n\n", fe)
365
202
  except Exception as e:
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.1.2'
4
+ __version__ = '0.1.4'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.1.2
5
+ version: 0.1.4
pembot/requirements.txt CHANGED
@@ -9,6 +9,7 @@ cffi==1.17.1
9
9
  charset-normalizer==3.4.2
10
10
  click==8.2.1
11
11
  cryptography==45.0.5
12
+ defusedxml==0.7.1
12
13
  dnspython==2.7.0
13
14
  duckduckgo_search==8.1.1
14
15
  et_xmlfile==2.0.0
@@ -36,6 +37,7 @@ MarkupSafe==3.0.2
36
37
  mdurl==0.1.2
37
38
  msgpack==1.1.1
38
39
  numpy==2.3.1
40
+ odfpy==1.4.1
39
41
  ollama==0.5.1
40
42
  openpyxl==3.1.5
41
43
  orjson==3.10.18
@@ -44,7 +46,7 @@ pandas==2.3.0
44
46
  pathlib==1.0.1
45
47
  pdfminer.six==20250506
46
48
  pdfplumber==0.11.7
47
- pembot==0.1.2
49
+ pembot==0.1.4
48
50
  pillow==11.3.0
49
51
  primp==0.15.0
50
52
  pyasn1==0.6.1
@@ -76,6 +78,7 @@ smolagents==1.20.0
76
78
  sniffio==1.3.1
77
79
  soupsieve==2.7
78
80
  starlette==0.46.2
81
+ tabulate==0.9.0
79
82
  tenacity==8.5.0
80
83
  tomlkit==0.13.3
81
84
  tqdm==4.67.1
@@ -86,3 +89,4 @@ tzdata==2025.2
86
89
  urllib3==2.5.0
87
90
  uvicorn==0.35.0
88
91
  websockets==15.0.1
92
+ xlrd==2.0.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,19 +1,19 @@
1
- pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
1
+ pembot/.gitignore,sha256=yyDEUmeqZekG4AOrU9Zvu2ZQhJvEzEg_lQp2CDfBhXM,92
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=JS8ONln7V9MIxi_JrRdEW9Kc9ygOmpCCRKEYC9TnTjA,211
3
+ pembot/__init__.py,sha256=ALfgnAweIDuCsninSev7KaxWZHPDj3uipgBHvJALnvI,211
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
8
  pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
9
9
  pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
10
- pembot/requirements.txt,sha256=BGGuhM9oXJGN6oueIu3AYmBMUXeo-BoEQFJHHyO1BCc,1508
10
+ pembot/requirements.txt,sha256=oNGOW-nqvzJZvy5qmFk__S5buH3jYS7-13VaLQxBhpI,1567
11
11
  pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
12
- pembot/.git/COMMIT_EDITMSG,sha256=pTsMiZ9dt9Of1JgR5858BXwxO8jn7P0MpLw0pJE7dqc,13
12
+ pembot/.git/COMMIT_EDITMSG,sha256=TpJZGgNNb5nhYiJQlrJUzYbeFbY6aCXZ8n9V0zQoe0E,90
13
13
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
14
14
  pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
15
15
  pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
16
- pembot/.git/index,sha256=70Iy37BW4GBRzaodLY4qm1hjjcBml0r1cdcFyNX_G_I,1974
16
+ pembot/.git/index,sha256=eLDRlMvYMR_BU749-CvqXohIriJGcW291fSOM6SHONw,2054
17
17
  pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
18
18
  pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
19
19
  pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
@@ -30,11 +30,13 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
30
30
  pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
31
31
  pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
32
32
  pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
33
- pembot/.git/logs/HEAD,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
34
- pembot/.git/logs/refs/heads/main,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
33
+ pembot/.git/logs/HEAD,sha256=-2ghm_6RqCop8Zq5lfUj27TWryvf5Eh6jXyROZL29RI,4360
34
+ pembot/.git/logs/refs/heads/main,sha256=-2ghm_6RqCop8Zq5lfUj27TWryvf5Eh6jXyROZL29RI,4360
35
35
  pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
36
- pembot/.git/logs/refs/remotes/origin/main,sha256=iJ6dB86rQ2-iqzRSdgRdjkze4t1IGz0MTxou3cwLYE4,2044
36
+ pembot/.git/logs/refs/remotes/origin/main,sha256=PN9Pc--KH5XuvjF8pDgEcJlQJIIumYsi8KRaIpQo_70,2482
37
+ pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b,sha256=pnk_IbjhUJWavx5BKSlXX8CEvWEMoSm8Dv1tQrUmzn4,169
37
38
  pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
39
+ pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4,sha256=eNZTNvT7qgsLCfJvRfTETWZIkk_vBEEroLNbPC8RRa4,90
38
40
  pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
39
41
  pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
40
42
  pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa,sha256=hsOHhX0Yajg27Y7B9lo-WjDXzW1KNMg2CBr93G116EY,387
@@ -42,15 +44,21 @@ pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1,sha256=GKt_CAJNOQX
42
44
  pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c,sha256=Uk1dStvEBica-t38qHsZZ_4mxvi6b6VA9PaKE4KSunQ,90
43
45
  pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05QL0Do-raB4AtK5QjvKLFBNc0RZqNga9o,244
44
46
  pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
47
+ pembot/.git/objects/0e/6b7f7409a88aa2595206b53112a666e4dca8a2,sha256=5sqQ9f08zuuqxJ-zpJzCmz2iPlgYe5D-BVKl_K_KKUQ,527
45
48
  pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
46
49
  pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
50
+ pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5,sha256=OhI6pEx_G6KbujS7idkp5MxJd1Aw92Wn3Sl-JBgU2VU,115
47
51
  pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
48
52
  pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
49
53
  pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
54
+ pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101,sha256=Zpth_iVM6H5W4u5jLVEKdRz0i3ydBLm5XJql4ieuj8U,169
50
55
  pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
56
+ pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a,sha256=tNzFPYJ0Y6YpaNw4w2hRH_0iTa5fNlC2nzARkKFbIec,162
57
+ pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2,sha256=SQ84I7DnyPaaxoWCBoh20Iw1VZm8wgSaPaL5uDR-R8I,90
51
58
  pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
52
59
  pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
53
60
  pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
61
+ pembot/.git/objects/3a/54acc088992fa8e890b93e83115ec6dc019835,sha256=EZlP672_7dB3SZ_ZwyBsFmVTJpLoblFqDdfoW-2v990,2879
54
62
  pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515,sha256=A9MNZO3QZ6ghGd1MyfmJ6H3dBTpF4HZcRosVxWytx8E,4077
55
63
  pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f,sha256=omF4gmE9IQFZR8t6ybAKfnW02tdn9ZaVWKRhv_o1V4c,2083
56
64
  pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f,sha256=ygVUpaLo7cxUdIgjFlaBh2BkllV6BIYYkzLIxsPKjWE,4111
@@ -62,11 +70,13 @@ pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D
62
70
  pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
63
71
  pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
64
72
  pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
73
+ pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331,sha256=Kx2Tcs17_chpF5rbY3AB34Cj1S3DGnr7Y1tZOTxvrdM,80
65
74
  pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4,sha256=jt9lsSz8c3dw9PyfEEtkReCC_8YLXSKuc6ykSJCKZPM,487
66
75
  pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
67
76
  pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
68
77
  pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
69
78
  pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
79
+ pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f,sha256=g84QcQu-1NZ4-MfLHRosIUOnlK0VItVBqqFW5ffGDNI,882
70
80
  pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
71
81
  pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
72
82
  pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
@@ -75,16 +85,20 @@ pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLy
75
85
  pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
76
86
  pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
77
87
  pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
88
+ pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e,sha256=0aByFDI4DyyfA-TKCFoUXbQAvNabJIV7CXHSIDna5bo,2833
89
+ pembot/.git/objects/74/5c54e85b3ea7bfc8a8f35edc907746c29f8663,sha256=xl8JaTrzL9T8JNKDi3c4xzknxk4XvNwDZIGrcgYfp_g,115
78
90
  pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd,sha256=7AXaYVgItbw3xQiEqeRyO5qdIedIxDoI9hTDn8CBRxM,56
79
91
  pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
80
92
  pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
81
93
  pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
82
94
  pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
83
95
  pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
96
+ pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138,sha256=7VMQzB6baLdC2Uj5f84w-X6XLM3GinXGBQjewhXupAc,914
84
97
  pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
85
98
  pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
86
99
  pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
87
100
  pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
101
+ pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349,sha256=mT1KGAHx7MalAkkpE7nAu6HlwXIB1Cts3MjZDLItErk,56
88
102
  pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643,sha256=WPgmr5bXli5s8rNdiUQM4IB4o_xyJe6nuI3TG4e5aYs,487
89
103
  pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
90
104
  pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
@@ -99,22 +113,28 @@ pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269,sha256=rJJenBYvGWd
99
113
  pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880,sha256=P_8LPBV0v4D17Akj4f5Cr2dhgNFUsh4o7DLK78CfNPo,349
100
114
  pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c,sha256=QELzH3NdMCFohFEcf5oAAu_e54VFr-LhTyPbXY7GjSk,169
101
115
  pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d,sha256=6cl8NMNQ9b5fBh97GPEQNssOVrh-EQLJfhqSBbNb_vU,205
116
+ pembot/.git/objects/b1/ddf2869bc7d213b35dabd6fa5bfae44cd6b7a7,sha256=zC9EjJo4qRxy8d1zuRafZ36QVP9KnxehomIfIC0ZZEo,915
102
117
  pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb,sha256=zfd9KnP9YtBMwzci1BMWFHAQR4BWJ3XQsyr-rFqdw0Q,135
103
118
  pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d,sha256=6EJskrHAkqVAC5ExxIZDQT_2kZWhfLPPAPbX61tmwgw,170
104
119
  pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3,sha256=xCom1B6wyws8ZNTJoIL4JtVIXNv1yPCwsXfNsVCAGQA,4410
120
+ pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15,sha256=Kr92INW6aFVOO0iZm0J2y2Yld9N1Dg-fP6zP1_cqe0g,525
105
121
  pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297,sha256=JOkICUEv6tdVp7mYDUKtXnsWq3IIZSmm8iUP7OqQwc4,56
106
122
  pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e,sha256=MpiiCqAk6GQ5iGzeThU0rsabrgA5tCAgdIWudAM0IrA,420
107
123
  pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFtMJgNKaPp6nTGDlMhPs6fkZTWevQWK_Lc,56
108
124
  pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
109
125
  pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
126
+ pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a,sha256=GPQso_R_RWWLx_pF3g58MiM4HyeSnpXTeLeKDfhkyPc,526
110
127
  pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
128
+ pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab,sha256=HAXSsWokz2tuk9Y952ogIEzSBlbUC4lZ1CjvWBc22Cg,56
111
129
  pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
130
+ pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495,sha256=7ZXWsXqapYhbZZJwaaeAwqGcgX8JwoS5DazqOGaRHeQ,179
112
131
  pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
113
132
  pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
114
133
  pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
115
134
  pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c,sha256=oAb2b2VwhPXykdK_ZV8MEFwfy-ZPd2Nja2gAv20U7hc,115
116
135
  pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a,sha256=DNdNDoMdjDexgwLErwUZDQCpvq4-QkFHtbVRXW_jKTk,168
117
136
  pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e,sha256=irJ-z8kPZmg85B0f4TQz73yJoCMWMWsIR3Pi5wx1Dlk,4034
137
+ pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67,sha256=e2NqH8wvYLSYgpHFoGTpurJ4gKU_PHSULZmjJETD3FQ,204
118
138
  pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc,sha256=r4zY-__F4gSfjE7onRTrcxvv8umXKuPuFzd95AiQ0cs,392
119
139
  pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d,sha256=qWZpM65kQPSxlVHAtyzH5L-j3rL-b9Jw-A7YBm4NMlI,249
120
140
  pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58,sha256=lXbMvL_xl8PhWWfL5WAnvxqE3usiGO3iY83yi3GZwXc,4438
@@ -140,17 +160,17 @@ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj
140
160
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
141
161
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
142
162
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
143
- pembot/.git/refs/heads/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
163
+ pembot/.git/refs/heads/main,sha256=3SSyWY2LZTJaO5WhuYpKDpZAxDBK77HHPnOtGsRO4nw,41
144
164
  pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
145
- pembot/.git/refs/remotes/origin/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
165
+ pembot/.git/refs/remotes/origin/main,sha256=3SSyWY2LZTJaO5WhuYpKDpZAxDBK77HHPnOtGsRO4nw,41
146
166
  pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
- pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
167
+ pembot/AnyToText/convertor.py,sha256=5oGrgWiznsmTHmq-oxdzHHriOpeXKH_jDzq19_3XCl4,9009
148
168
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
169
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
150
170
  pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
151
171
  pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
152
172
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
153
- pembot/config/config.yaml,sha256=uLE_cngQbhDN3pwqRaZC60yhXP5dSFUYIWQKZ0qkhFM,156
173
+ pembot/config/config.yaml,sha256=lry9zmzSb6bS0GEyH1pCTDfvNFq8g-AD-zz9eOkKJ4o,156
154
174
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
155
175
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
156
176
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -206,7 +226,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
206
226
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
207
227
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
208
228
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
209
- pembot-0.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
210
- pembot-0.1.2.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
211
- pembot-0.1.2.dist-info/METADATA,sha256=2Eaou5uE_IQB3jUmUnvxXuY_ifpKbo9ZhZpdVjj2DMk,313
212
- pembot-0.1.2.dist-info/RECORD,,
229
+ pembot-0.1.4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
230
+ pembot-0.1.4.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
231
+ pembot-0.1.4.dist-info/METADATA,sha256=ere6mCBeTMLBoRB5rQOQ88yHJkDOQUVv18DJI57CbLA,313
232
+ pembot-0.1.4.dist-info/RECORD,,
File without changes