pembot 0.1.2__py2.py3-none-any.whl → 0.1.3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (28) hide show
  1. pembot/.git/COMMIT_EDITMSG +1 -1
  2. pembot/.git/index +0 -0
  3. pembot/.git/logs/HEAD +2 -0
  4. pembot/.git/logs/refs/heads/main +2 -0
  5. pembot/.git/logs/refs/remotes/origin/main +2 -0
  6. pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5 +0 -0
  7. pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101 +0 -0
  8. pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a +0 -0
  9. pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2 +0 -0
  10. pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331 +0 -0
  11. pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f +0 -0
  12. pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e +0 -0
  13. pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138 +0 -0
  14. pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15 +0 -0
  15. pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a +0 -0
  16. pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab +0 -0
  17. pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495 +0 -0
  18. pembot/.git/refs/heads/main +1 -1
  19. pembot/.git/refs/remotes/origin/main +1 -1
  20. pembot/.gitignore +0 -1
  21. pembot/AnyToText/convertor.py +58 -225
  22. pembot/__init__.py +1 -1
  23. pembot/config/config.yaml +1 -1
  24. pembot/requirements.txt +5 -1
  25. {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/METADATA +1 -1
  26. {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/RECORD +28 -16
  27. {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/WHEEL +0 -0
  28. {pembot-0.1.2.dist-info → pembot-0.1.3.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- minor oopsie
1
+ cyto/fixed the excel to markdown conversion
pembot/.git/index CHANGED
Binary file
pembot/.git/logs/HEAD CHANGED
@@ -13,3 +13,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
13
13
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
14
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
15
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
16
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
17
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
@@ -13,3 +13,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
13
13
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
14
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
15
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
16
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865439 +0530 commit: added requirements
17
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877816 +0530 commit: cyto/fixed the excel to markdown conversion
@@ -12,3 +12,5 @@ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db270
12
12
  a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
13
13
  784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
14
14
  f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859659 +0530 update by push
15
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a cyto <silverstone965@gmail.com> 1752865472 +0530 update by push
16
+ 203b390ad0aeb3bc5a8540840b004e6a42e5ce7a c3cc0da3d955ecec0f865c46c030a0c073697495 cyto <silverstone965@gmail.com> 1758877832 +0530 update by push
@@ -1 +1 @@
1
- 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
1
+ c3cc0da3d955ecec0f865c46c030a0c073697495
@@ -1 +1 @@
1
- 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
1
+ c3cc0da3d955ecec0f865c46c030a0c073697495
pembot/.gitignore CHANGED
@@ -1,4 +1,3 @@
1
- *.txt
2
1
  TextEmbedder/__pycache__/
3
2
  pdf2markdown/
4
3
  __pycache__/
@@ -3,11 +3,11 @@ import mimetypes
3
3
  from pathlib import Path
4
4
  from pembot.pdf2markdown.extract import MarkdownPDFExtractor
5
5
  import os
6
- import json
7
6
  import pandas as pd
8
- from typing import Literal, Union, Dict, Any, List
7
+ from typing import Literal, Union
9
8
  import tempfile
10
9
  from datetime import datetime, date
10
+ from tabulate import tabulate
11
11
 
12
12
 
13
13
  PandasReadEngineType = Literal['xlrd', 'openpyxl', 'odf', 'pyxlsb', 'calamine', None]
@@ -53,10 +53,7 @@ class Convertor():
53
53
  self.output= output_file.read()
54
54
  elif file_type == 'excel':
55
55
  self.input_filepath= myfile
56
- self.json_filepath = output_dir / (myfile.stem + ".json")
57
- self.convert_file_to_json()
58
- with open(output_dir / (myfile.stem + '.json')) as output_file:
59
- self.output= output_file.read()
56
+ self.output= self.convert_excel_to_markdown()
60
57
 
61
58
  elif output_dir is not None and myfile is not None:
62
59
  print("got output path for conversion: ", output_dir)
@@ -64,8 +61,6 @@ class Convertor():
64
61
 
65
62
  self.output_dir= output_dir
66
63
  self.input_filepath= myfile
67
- base_name, _ = os.path.splitext(myfile.name)
68
- self.json_filepath = output_dir / 'json' / (base_name + ".json")
69
64
 
70
65
  if mt == 'application/json':
71
66
  print("the file was json")
@@ -73,242 +68,80 @@ class Convertor():
73
68
  print("the file was pdf, outputting in: ", output_dir)
74
69
  extractor= MarkdownPDFExtractor(str(myfile), output_path= str(self.output_dir), page_delimiter= "-- NEXT PAGE --", model_name= model_name)
75
70
  extractor.extract()
71
+ with open(self.output_dir / (myfile.stem + '.md')) as output_file:
72
+ self.output= output_file.read()
76
73
 
77
74
  elif mt in EXCEL_FILE_TYPES:
78
- self.convert_file_to_json()
75
+ self.output = self.convert_excel_to_markdown()
79
76
 
80
77
  else:
81
78
  print(mt)
82
79
 
83
- def convert_file_to_json(
84
- self,
85
- sheet_to_convert: Union[str, int, None] = None, # Relevant for Excel/ODS
86
- orient: Literal['dict', 'list', 'series', 'split', 'records', 'index'] = 'records', # Corrected type hint
87
- date_format: Union[str, None] = 'iso', # 'iso', 'epoch', or None
88
- csv_encoding: str = 'utf-8', # For reading CSV files
89
- excel_ods_engine: PandasReadEngineType = None # For Excel/ODS, e.g., 'openpyxl', 'xlrd', 'odf'
90
- ) -> bool:
80
+ def convert_excel_to_markdown(self, excel_ods_engine: PandasReadEngineType = None) -> str:
91
81
  """
92
- Converts an Excel, ODS, or CSV file (or a specific Excel/ODS sheet)
93
- into an equivalent JSON format.
82
+ Converts all sheets from an Excel or ODS file into a single Markdown string.
83
+ Each sheet is converted to a Markdown table, prefixed with the sheet's name.
94
84
 
95
85
  Args:
96
- sheet_to_convert (str | int | None, optional):
97
- - For Excel/ODS:
98
- - If None (default): Converts all sheets. The JSON output will be a
99
- dictionary where keys are sheet names and values are the JSON
100
- representation of each sheet.
101
- - If str: Name of the specific sheet to convert.
102
- - If int: Index of the specific sheet to convert (0-based).
103
- If a specific sheet is requested, the JSON output will directly be
104
- the representation of that sheet.
105
- - For CSV: This parameter is ignored. The entire CSV is processed.
106
- orient (str, optional): Pandas DataFrame.to_dict() orientation for each sheet/CSV.
107
- Default: 'records'. See pandas.DataFrame.to_dict() documentation.
108
- date_format (str | None, optional): Format for datetime objects.
109
- - 'iso' (default): ISO8601 format (e.g., '2023-10-27T10:30:00').
110
- - 'epoch': Milliseconds since epoch.
111
- - None: Pandas default (often Timestamps). 'iso' is generally safer for JSON.
112
- csv_encoding (str, optional): Encoding for reading CSV files. Default is 'utf-8'.
113
86
  excel_ods_engine (str | None, optional): Pandas engine for reading Excel or ODS files.
114
87
  - For Excel: 'openpyxl' (for .xlsx), 'xlrd' (for .xls).
115
88
  - For ODS: 'odf' (requires 'odfpy' library).
116
89
  If None, pandas auto-detects based on file extension and installed libraries.
117
90
 
118
91
  Returns:
119
- bool: True if conversion was successful, False otherwise.
92
+ str: A string containing the Markdown tables for all sheets, or an error message.
120
93
  """
121
-
122
94
  input_filepath = self.input_filepath
123
- json_filepath = self.json_filepath
95
+ markdown_output = []
124
96
 
97
+ file_suffix= ''
125
98
  try:
126
-
127
99
  if not input_filepath.exists():
128
- print(f"Error: Input file not found at {input_filepath}")
129
- return False
130
-
131
- # Ensure output directory exists
132
- json_filepath.parent.mkdir(parents=True, exist_ok=True)
100
+ return f"Error: Input file not found at {input_filepath}"
133
101
 
134
102
  file_suffix = input_filepath.suffix.lower()
135
- output_data_final: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
136
-
137
- dataframes_to_process: list[tuple[pd.DataFrame, str | None]] = []
138
-
139
103
  current_engine: PandasReadEngineType = excel_ods_engine
140
104
 
141
- if file_suffix == '.csv':
142
- if sheet_to_convert is not None:
143
- print(f"Info: 'sheet_to_convert' parameter ('{sheet_to_convert}') is ignored for CSV file '{input_filepath.name}'. Processing entire CSV.")
144
- try:
145
- df = pd.read_csv(input_filepath, encoding=csv_encoding)
146
- dataframes_to_process.append((df, None))
147
- except Exception as e:
148
- print(f"Error reading CSV file '{input_filepath.name}': {e}")
149
- return False
150
-
151
- elif file_suffix in ['.xls', '.xlsx', '.ods']:
152
- try:
153
- if file_suffix == '.ods':
154
- if current_engine is None:
155
- current_engine = 'odf'
156
- elif current_engine != 'odf':
157
- print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
158
- current_engine = 'odf'
159
-
160
- if sheet_to_convert is not None:
161
- df = pd.read_excel(input_filepath, sheet_name=sheet_to_convert, engine=current_engine)
162
- dataframes_to_process.append((df, None))
163
-
164
- else:
165
- excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
166
- if not excel_file.sheet_names:
167
- print(f"Warning: File '{input_filepath.name}' contains no sheets.")
168
- for sheet_name in excel_file.sheet_names:
169
- df = excel_file.parse(sheet_name) # engine is inherited
170
- dataframes_to_process.append((df, sheet_name))
171
- except ImportError as ie:
172
- if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
173
- print(f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'.")
174
- elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
175
- print(f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'.")
176
- elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
177
- print(f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'.")
178
- else:
179
- print(f"ImportError reading file '{input_filepath.name}': {ie}")
180
- return False
181
- except Exception as e:
182
- print(f"Error reading Excel/ODS file '{input_filepath.name}': {e}")
183
- return False
184
- else:
185
- print(f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file.")
186
- return False
187
-
188
- if not dataframes_to_process and file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None:
189
- print(f"Info: No dataframes were loaded from '{input_filepath.name}'. Output JSON will be empty if processing all sheets from an empty file.")
190
- elif not dataframes_to_process and not (file_suffix in ['.xls', '.xlsx', '.ods'] and sheet_to_convert is None):
191
- pass
192
-
193
- is_direct_output = len(dataframes_to_process) == 1 and dataframes_to_process[0][1] is None
194
- temp_processed_data: Dict[str, Any] = {}
195
-
196
- for df_original, name_key in dataframes_to_process:
197
- df = df_original.copy()
198
-
199
- # Handle datetime columns with improved detection and conversion
200
- if date_format:
201
- # Check for datetime columns using multiple approaches
202
- datetime_columns = []
203
-
204
- # Method 1: Use pandas dtype detection
205
- datetime_columns.extend(df.select_dtypes(include=['datetime64[ns]', 'datetime', 'datetimetz']).columns.tolist())
206
-
207
- # Method 2: Check for datetime objects in each column
208
- for col in df.columns:
209
- if col not in datetime_columns:
210
- # Sample a few non-null values to check type
211
- sample_values = df[col].dropna().head(10)
212
- if len(sample_values) > 0:
213
- for val in sample_values:
214
- if isinstance(val, (datetime, date, pd.Timestamp)):
215
- datetime_columns.append(col)
216
- break
217
-
218
- # Convert datetime columns
219
- for col_name in datetime_columns:
220
- try:
221
- if date_format == 'iso':
222
- df[col_name] = df[col_name].apply(lambda x: self._convert_to_iso(x))
223
- elif date_format == 'epoch':
224
- df[col_name] = df[col_name].apply(lambda x: self._convert_to_epoch(x))
225
- except Exception as e_date:
226
- print(f"Warning: Could not fully convert date column '{col_name}' in '{name_key or input_filepath.name}' using format '{date_format}'. Error: {e_date}")
227
-
228
- # Replace NaN values with None for JSON compatibility
229
- df = df.astype(object).where(pd.notnull(df), None)
230
-
231
- # Final safety check: convert any remaining datetime objects
232
- for col in df.columns:
233
- df[col] = df[col].apply(lambda x: self._safe_datetime_convert(x, date_format))
234
-
235
- current_json_segment = df.to_dict(orient=orient)
236
-
237
- if is_direct_output:
238
- output_data_final = current_json_segment
239
- break
240
- else:
241
- if name_key is not None:
242
- temp_processed_data[name_key] = current_json_segment
243
-
244
- if not is_direct_output:
245
- output_data_final = temp_processed_data
246
-
247
- with open(json_filepath, 'w', encoding='utf-8') as f:
248
- json.dump(output_data_final, f, indent=4, ensure_ascii=False)
249
-
250
- print(f"Successfully converted '{input_filepath.name}' to '{json_filepath.name}'")
251
- return True
252
-
253
- except FileNotFoundError:
254
- print(f"Error: Input file not found at {input_filepath.name}")
255
- return False
256
- except ValueError as ve:
257
- print(f"ValueError during conversion of '{input_filepath.name}': {ve}")
258
- return False
259
- except Exception as e:
260
- print(f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}")
261
- return False
105
+ if file_suffix in ['.xls', '.xlsx', '.ods']:
106
+ if file_suffix == '.ods':
107
+ if current_engine is None:
108
+ current_engine = 'odf'
109
+ elif current_engine != 'odf':
110
+ print(f"Warning: Specified engine '{current_engine}' may not be optimal for ODS. Forcing 'odf'.")
111
+ current_engine = 'odf'
262
112
 
263
- def _convert_to_iso(self, value):
264
- """Convert datetime-like objects to ISO format string."""
265
- if pd.isnull(value) or value is None:
266
- return None
113
+ excel_file = pd.ExcelFile(input_filepath, engine=current_engine)
114
+ if not excel_file.sheet_names:
115
+ return f"Warning: File '{input_filepath.name}' contains no sheets."
267
116
 
268
- try:
269
- if isinstance(value, str):
270
- return value # Already a string
271
- elif hasattr(value, 'isoformat'):
272
- return value.isoformat()
273
- elif isinstance(value, pd.Timestamp):
274
- return value.isoformat()
275
- else:
276
- return str(value)
277
- except:
278
- return str(value) if value is not None else None
117
+ for sheet_name in excel_file.sheet_names:
118
+ df = excel_file.parse(sheet_name)
119
+ markdown_output.append(f"## {sheet_name}\n")
120
+ markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
121
+ markdown_output.append(markdown_table)
122
+ markdown_output.append("\n")
279
123
 
280
- def _convert_to_epoch(self, value):
281
- """Convert datetime-like objects to epoch milliseconds."""
282
- if pd.isnull(value) or value is None:
283
- return None
124
+ return "\n".join(markdown_output)
125
+
126
+ elif file_suffix == '.csv':
127
+ df = pd.read_csv(input_filepath)
128
+ markdown_table = tabulate(df, headers='keys', tablefmt='pipe')
129
+ return markdown_table
284
130
 
285
- try:
286
- if isinstance(value, (int, float)):
287
- return int(value) # Assume already epoch
288
- elif hasattr(value, 'timestamp'):
289
- return int(value.timestamp() * 1000)
290
- elif isinstance(value, pd.Timestamp):
291
- return int(value.timestamp() * 1000)
292
131
  else:
293
- return str(value)
294
- except:
295
- return str(value) if value is not None else None
296
-
297
- def _safe_datetime_convert(self, value, date_format):
298
- """Final safety conversion for any remaining datetime objects."""
299
- if pd.isnull(value) or value is None:
300
- return None
301
-
302
- # If it's a datetime-like object, convert it
303
- if isinstance(value, (datetime, date, pd.Timestamp)):
304
- if date_format == 'iso':
305
- return self._convert_to_iso(value)
306
- elif date_format == 'epoch':
307
- return self._convert_to_epoch(value)
132
+ return f"Error: Unsupported file type: '{file_suffix}'. Please provide a CSV, XLS, XLSX, or ODS file."
133
+
134
+ except ImportError as ie:
135
+ if 'odfpy' in str(ie).lower() and file_suffix == '.ods':
136
+ return f"Error reading ODS file '{input_filepath.name}': The 'odfpy' library is required. Please install it using 'pip install odfpy'."
137
+ elif 'xlrd' in str(ie).lower() and file_suffix == '.xls':
138
+ return f"Error reading .xls file '{input_filepath.name}': The 'xlrd' library might be required. Please install it using 'pip install xlrd'."
139
+ elif 'openpyxl' in str(ie).lower() and file_suffix == '.xlsx':
140
+ return f"Error reading .xlsx file '{input_filepath.name}': The 'openpyxl' library might be required. Please install it using 'pip install openpyxl'."
308
141
  else:
309
- return str(value)
310
-
311
- return value
142
+ return f"ImportError reading file '{input_filepath.name}': {ie}"
143
+ except Exception as e:
144
+ return f"An unexpected error occurred during conversion of '{input_filepath.name}': {e}"
312
145
 
313
146
 
314
147
  def chunk_text(text, chunk_size=500, overlap_size=50):
@@ -337,29 +170,29 @@ def chunk_text(text, chunk_size=500, overlap_size=50):
337
170
  if __name__ == '__main__':
338
171
  print("Test Run Start:")
339
172
  try:
340
- print("Test 1: scaned pdf page, bytes")
341
- with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
342
- conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
343
- print(conv.output)
173
+ # print("Test 1: scaned pdf page, bytes")
174
+ # with open("/home/cyto/Documents/scanned.pdf", "rb") as imgpdf:
175
+ # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
176
+ # print(conv.output)
344
177
 
345
178
  # print("Test 2: JD pdf, bytes")
346
179
  # with open("/home/cyto/dev/pembotdir/jds/PM Trainee.pdf", "rb") as imgpdf:
347
180
  # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".pdf", file_type= "pdf")
348
181
  # print(conv.output)
349
182
 
350
- # print("Test 3: excel schedule, bytes")
351
- # with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
352
- # conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
353
- # print(conv.output)
183
+ print("Test 3: excel schedule, bytes")
184
+ with open("/home/cyto/Downloads/Assignment schedule.xlsx", "rb") as imgpdf:
185
+ conv= Convertor(file_bytes= imgpdf.read(), suffix= ".xlsx", file_type= "excel")
186
+ print(conv.output)
354
187
 
355
188
  # without bytes example:
356
189
  print("Test 4: scanned pdf, path")
357
190
  conv= Convertor(myfile= Path('/home/cyto/Documents/scanned.pdf'), output_dir= Path('/home/cyto/Documents'))
358
191
  print(conv.output)
359
192
 
360
- # print("Test 5: schedule excel, path")
361
- # conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
362
- # print(conv.output)
193
+ print("Test 5: schedule excel, path")
194
+ conv= Convertor(myfile= Path('/home/cyto/Downloads/Assignment schedule.xlsx'), output_dir= Path('/home/cyto/Downloads'))
195
+ print(conv.output)
363
196
  except FileNotFoundError as fe:
364
197
  print("file not found, modify the driver code to get sample files to test:\n\n", fe)
365
198
  except Exception as e:
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.1.2'
4
+ __version__ = '0.1.3'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.1.2
5
+ version: 0.1.3
pembot/requirements.txt CHANGED
@@ -9,6 +9,7 @@ cffi==1.17.1
9
9
  charset-normalizer==3.4.2
10
10
  click==8.2.1
11
11
  cryptography==45.0.5
12
+ defusedxml==0.7.1
12
13
  dnspython==2.7.0
13
14
  duckduckgo_search==8.1.1
14
15
  et_xmlfile==2.0.0
@@ -36,6 +37,7 @@ MarkupSafe==3.0.2
36
37
  mdurl==0.1.2
37
38
  msgpack==1.1.1
38
39
  numpy==2.3.1
40
+ odfpy==1.4.1
39
41
  ollama==0.5.1
40
42
  openpyxl==3.1.5
41
43
  orjson==3.10.18
@@ -44,7 +46,7 @@ pandas==2.3.0
44
46
  pathlib==1.0.1
45
47
  pdfminer.six==20250506
46
48
  pdfplumber==0.11.7
47
- pembot==0.1.2
49
+ pembot==0.1.3
48
50
  pillow==11.3.0
49
51
  primp==0.15.0
50
52
  pyasn1==0.6.1
@@ -76,6 +78,7 @@ smolagents==1.20.0
76
78
  sniffio==1.3.1
77
79
  soupsieve==2.7
78
80
  starlette==0.46.2
81
+ tabulate==0.9.0
79
82
  tenacity==8.5.0
80
83
  tomlkit==0.13.3
81
84
  tqdm==4.67.1
@@ -86,3 +89,4 @@ tzdata==2025.2
86
89
  urllib3==2.5.0
87
90
  uvicorn==0.35.0
88
91
  websockets==15.0.1
92
+ xlrd==2.0.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,19 +1,19 @@
1
- pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
1
+ pembot/.gitignore,sha256=yyDEUmeqZekG4AOrU9Zvu2ZQhJvEzEg_lQp2CDfBhXM,92
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=JS8ONln7V9MIxi_JrRdEW9Kc9ygOmpCCRKEYC9TnTjA,211
3
+ pembot/__init__.py,sha256=BSH5pBwk4cE-px43hTajMIU1KPohYzz7NSoELkXBd7s,211
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
8
  pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
9
9
  pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
10
- pembot/requirements.txt,sha256=BGGuhM9oXJGN6oueIu3AYmBMUXeo-BoEQFJHHyO1BCc,1508
10
+ pembot/requirements.txt,sha256=bSXSM-tdrwGooRpz9e1VE3Yt9hXwz3inSq2PYpPw3hU,1567
11
11
  pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
12
- pembot/.git/COMMIT_EDITMSG,sha256=pTsMiZ9dt9Of1JgR5858BXwxO8jn7P0MpLw0pJE7dqc,13
12
+ pembot/.git/COMMIT_EDITMSG,sha256=PRRNgCxdUYryr-DaiG3MSBswsgdYtfleiBnQX_2R64U,44
13
13
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
14
14
  pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
15
15
  pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
16
- pembot/.git/index,sha256=70Iy37BW4GBRzaodLY4qm1hjjcBml0r1cdcFyNX_G_I,1974
16
+ pembot/.git/index,sha256=5oJw9H2tF6dW5jUIaChBVJVZAfTwLdB-F-cyM_oLdx0,2054
17
17
  pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
18
18
  pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
19
19
  pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
@@ -30,10 +30,10 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
30
30
  pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
31
31
  pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
32
32
  pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
33
- pembot/.git/logs/HEAD,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
34
- pembot/.git/logs/refs/heads/main,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
33
+ pembot/.git/logs/HEAD,sha256=yPZc9m6stXELdpuwEBMSjn0rVegOtxHb8YcCsBL0USA,4131
34
+ pembot/.git/logs/refs/heads/main,sha256=yPZc9m6stXELdpuwEBMSjn0rVegOtxHb8YcCsBL0USA,4131
35
35
  pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
36
- pembot/.git/logs/refs/remotes/origin/main,sha256=iJ6dB86rQ2-iqzRSdgRdjkze4t1IGz0MTxou3cwLYE4,2044
36
+ pembot/.git/logs/refs/remotes/origin/main,sha256=i4leZYJW4JljsOW0xXKwBmnDOlygP8qJN378MfpDauM,2336
37
37
  pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
38
38
  pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
39
39
  pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
@@ -44,10 +44,14 @@ pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05
44
44
  pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
45
45
  pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
46
46
  pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
47
+ pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5,sha256=OhI6pEx_G6KbujS7idkp5MxJd1Aw92Wn3Sl-JBgU2VU,115
47
48
  pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
48
49
  pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
49
50
  pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
51
+ pembot/.git/objects/1f/791d08c432b4244a670517c87ada2181159101,sha256=Zpth_iVM6H5W4u5jLVEKdRz0i3ydBLm5XJql4ieuj8U,169
50
52
  pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
53
+ pembot/.git/objects/20/3b390ad0aeb3bc5a8540840b004e6a42e5ce7a,sha256=tNzFPYJ0Y6YpaNw4w2hRH_0iTa5fNlC2nzARkKFbIec,162
54
+ pembot/.git/objects/27/02d55c4513a6d23e577aa2f104982c8b9436b2,sha256=SQ84I7DnyPaaxoWCBoh20Iw1VZm8wgSaPaL5uDR-R8I,90
51
55
  pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
52
56
  pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
53
57
  pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
@@ -62,11 +66,13 @@ pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D
62
66
  pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
63
67
  pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
64
68
  pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
69
+ pembot/.git/objects/48/b71bba3a3f9887828863521c13901eceb54331,sha256=Kx2Tcs17_chpF5rbY3AB34Cj1S3DGnr7Y1tZOTxvrdM,80
65
70
  pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4,sha256=jt9lsSz8c3dw9PyfEEtkReCC_8YLXSKuc6ykSJCKZPM,487
66
71
  pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
67
72
  pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
68
73
  pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
69
74
  pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
75
+ pembot/.git/objects/5b/efa3b2f18d2b5d332c6de503a7054f4af0569f,sha256=g84QcQu-1NZ4-MfLHRosIUOnlK0VItVBqqFW5ffGDNI,882
70
76
  pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
71
77
  pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
72
78
  pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
@@ -75,12 +81,14 @@ pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLy
75
81
  pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
76
82
  pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
77
83
  pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
84
+ pembot/.git/objects/73/5b5f6d515f0816599343f1ae7ccffc1d5a487e,sha256=0aByFDI4DyyfA-TKCFoUXbQAvNabJIV7CXHSIDna5bo,2833
78
85
  pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd,sha256=7AXaYVgItbw3xQiEqeRyO5qdIedIxDoI9hTDn8CBRxM,56
79
86
  pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
80
87
  pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
81
88
  pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
82
89
  pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
83
90
  pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
91
+ pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138,sha256=7VMQzB6baLdC2Uj5f84w-X6XLM3GinXGBQjewhXupAc,914
84
92
  pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
85
93
  pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
86
94
  pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
@@ -102,13 +110,17 @@ pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d,sha256=6cl8NMNQ9b5
102
110
  pembot/.git/objects/b2/4e79ab07fe9e68781961a25ff9f1dbb1546fbb,sha256=zfd9KnP9YtBMwzci1BMWFHAQR4BWJ3XQsyr-rFqdw0Q,135
103
111
  pembot/.git/objects/b8/884c6145221ac66f84bf88919754c2cb05c12d,sha256=6EJskrHAkqVAC5ExxIZDQT_2kZWhfLPPAPbX61tmwgw,170
104
112
  pembot/.git/objects/b8/eea52176ffa4d88c5a9976bee26092421565d3,sha256=xCom1B6wyws8ZNTJoIL4JtVIXNv1yPCwsXfNsVCAGQA,4410
113
+ pembot/.git/objects/bb/a495d8e72b78fefcc534259b8edae9a3172d15,sha256=Kr92INW6aFVOO0iZm0J2y2Yld9N1Dg-fP6zP1_cqe0g,525
105
114
  pembot/.git/objects/bd/8fd1cb166996e74a8631f3a6f764a53af75297,sha256=JOkICUEv6tdVp7mYDUKtXnsWq3IIZSmm8iUP7OqQwc4,56
106
115
  pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e,sha256=MpiiCqAk6GQ5iGzeThU0rsabrgA5tCAgdIWudAM0IrA,420
107
116
  pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFtMJgNKaPp6nTGDlMhPs6fkZTWevQWK_Lc,56
108
117
  pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
109
118
  pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
119
+ pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a,sha256=GPQso_R_RWWLx_pF3g58MiM4HyeSnpXTeLeKDfhkyPc,526
110
120
  pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
121
+ pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab,sha256=HAXSsWokz2tuk9Y952ogIEzSBlbUC4lZ1CjvWBc22Cg,56
111
122
  pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
123
+ pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495,sha256=7ZXWsXqapYhbZZJwaaeAwqGcgX8JwoS5DazqOGaRHeQ,179
112
124
  pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
113
125
  pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
114
126
  pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
@@ -140,17 +152,17 @@ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj
140
152
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
141
153
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
142
154
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
143
- pembot/.git/refs/heads/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
155
+ pembot/.git/refs/heads/main,sha256=a4NhMFSYteuWFt7KclttMW9wgrlLiv3bnLAY88lZVCU,41
144
156
  pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
145
- pembot/.git/refs/remotes/origin/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
157
+ pembot/.git/refs/remotes/origin/main,sha256=a4NhMFSYteuWFt7KclttMW9wgrlLiv3bnLAY88lZVCU,41
146
158
  pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
147
- pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
159
+ pembot/AnyToText/convertor.py,sha256=L0d6AevJBtyC-5pP-vJGHR_Uaumf0iWPNYLmwGaiUHI,8772
148
160
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
161
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
150
162
  pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
151
163
  pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
152
164
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
153
- pembot/config/config.yaml,sha256=uLE_cngQbhDN3pwqRaZC60yhXP5dSFUYIWQKZ0qkhFM,156
165
+ pembot/config/config.yaml,sha256=DMNFp9EdsN22ZGe8Tp4DRCn-wAWSinhN5oI1muXE-_Y,156
154
166
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
155
167
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
156
168
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -206,7 +218,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
206
218
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
207
219
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
208
220
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
209
- pembot-0.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
210
- pembot-0.1.2.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
211
- pembot-0.1.2.dist-info/METADATA,sha256=2Eaou5uE_IQB3jUmUnvxXuY_ifpKbo9ZhZpdVjj2DMk,313
212
- pembot-0.1.2.dist-info/RECORD,,
221
+ pembot-0.1.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
222
+ pembot-0.1.3.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
223
+ pembot-0.1.3.dist-info/METADATA,sha256=vftTQKyRwrw4BFJ_hdonub57buM5DyRXDWw28sUt884,313
224
+ pembot-0.1.3.dist-info/RECORD,,
File without changes