pembot 0.1.8__py2.py3-none-any.whl → 0.1.10__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (30) hide show
  1. pembot/.git/COMMIT_EDITMSG +1 -1
  2. pembot/.git/index +0 -0
  3. pembot/.git/logs/HEAD +2 -0
  4. pembot/.git/logs/refs/heads/main +2 -0
  5. pembot/.git/logs/refs/remotes/origin/main +2 -0
  6. pembot/.git/objects/06/ef9ad559094e5b48fe2a1b437dca5cea07c06b +0 -0
  7. pembot/.git/objects/15/16e2e3e4eb2a9a5304ed49c8405e19ac6ee3e5 +0 -0
  8. pembot/.git/objects/41/cca8bf63122d1044d2fb36f63467ccd500832e +0 -0
  9. pembot/.git/objects/8c/122efcd19ed7f20b8e7ce8eaecdff8e67ca8fa +0 -0
  10. pembot/.git/objects/8d/58ec13be39949ecfe7211b42c56acd2a83dc72 +1 -0
  11. pembot/.git/objects/92/94065a10a11fe0ba233baeb658f605bacd1625 +0 -0
  12. pembot/.git/objects/95/5e4b9fb51a8891b972a85719a25b521bdaf97e +0 -0
  13. pembot/.git/objects/a2/8dc2ae0dec8333e8a24d1d195067acf2bc03fc +0 -0
  14. pembot/.git/objects/c0/e6cf6ef4d7f0100113213d8bab75b966cd79ef +0 -0
  15. pembot/.git/objects/c9/d9d9b3a93b142e8b7266fc4e3e2417128a0b32 +0 -0
  16. pembot/.git/objects/d3/508f1537e9bd48bc784da569e14c342bc9c05e +0 -0
  17. pembot/.git/objects/d6/3c63232c606d61eaaa7acce7a7a07d134d04da +0 -0
  18. pembot/.git/objects/e2/dc7f666268cf207253a15d55a7fc73093c6d3b +3 -0
  19. pembot/.git/objects/e5/0cd305278d92afa13f3876027fd52639368788 +0 -0
  20. pembot/.git/refs/heads/main +1 -1
  21. pembot/.git/refs/remotes/origin/main +1 -1
  22. pembot/AnyToText/convertor.py +1 -0
  23. pembot/__init__.py +1 -1
  24. pembot/config/config.yaml +1 -1
  25. pembot/pdf2markdown/extract.py +31 -19
  26. pembot/requirements.txt +1 -1
  27. {pembot-0.1.8.dist-info → pembot-0.1.10.dist-info}/METADATA +1 -1
  28. {pembot-0.1.8.dist-info → pembot-0.1.10.dist-info}/RECORD +30 -16
  29. {pembot-0.1.8.dist-info → pembot-0.1.10.dist-info}/WHEEL +0 -0
  30. {pembot-0.1.8.dist-info → pembot-0.1.10.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
1
+ added logs; fixed typo
pembot/.git/index CHANGED
Binary file
pembot/.git/logs/HEAD CHANGED
@@ -20,3 +20,5 @@ e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fc
20
20
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
21
21
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
22
22
  e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
23
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
24
+ 8d58ec13be39949ecfe7211b42c56acd2a83dc72 e2dc7f666268cf207253a15d55a7fc73093c6d3b cyto <silverstone965@gmail.com> 1761539934 +0530 commit: added logs; fixed typo
@@ -20,3 +20,5 @@ e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fc
20
20
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891589 +0530 commit: cyto/made the excel convertor compatible with suffix input from both system file and with bytes + manual suffix input
21
21
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892053 +0530 commit: silly willy mistake
22
22
  e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127664 +0530 commit: cyto/fixed the excel file input bug, in the conversion method; added exceptions where there is invalid input
23
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392079 +0530 commit: added DeepSeek-OCR as a model option to use spaces
24
+ 8d58ec13be39949ecfe7211b42c56acd2a83dc72 e2dc7f666268cf207253a15d55a7fc73093c6d3b cyto <silverstone965@gmail.com> 1761539934 +0530 commit: added logs; fixed typo
@@ -19,3 +19,5 @@ e6adbc3c373070269f97ef82d4f63027d7878f67 81d01e1c63d48b096c77aae83471d42272ca9fc
19
19
  81d01e1c63d48b096c77aae83471d42272ca9fce e89cb4f5af158d26dcff5eed03dba6671a818739 cyto <silverstone965@gmail.com> 1758891600 +0530 update by push
20
20
  e89cb4f5af158d26dcff5eed03dba6671a818739 e3c62c141fc65ef2be0095c49b23e06263f0b734 cyto <silverstone965@gmail.com> 1758892065 +0530 update by push
21
21
  e3c62c141fc65ef2be0095c49b23e06263f0b734 d440b20aae1265dabbd3ddaafb24c35e40e3ab3c cyto <silverstone965@gmail.com> 1759127680 +0530 update by push
22
+ d440b20aae1265dabbd3ddaafb24c35e40e3ab3c 8d58ec13be39949ecfe7211b42c56acd2a83dc72 cyto <silverstone965@gmail.com> 1761392127 +0530 update by push
23
+ 8d58ec13be39949ecfe7211b42c56acd2a83dc72 e2dc7f666268cf207253a15d55a7fc73093c6d3b cyto <silverstone965@gmail.com> 1761539997 +0530 update by push
@@ -0,0 +1 @@
1
+ x��Mj�0@�u�ٗ�ѯc(��� � F3��Բ��z�� �~��q-e���}�*�L�<%o��!���O�40KD<{�f�]�f�Dj]�B9�!�� �TOٳ�[����Wxm��{�u�1���B��µ������a�g�͡�`�RC"*�}����>.@
@@ -0,0 +1,3 @@
1
+ x��A
2
+ �0E]����t� �x�I2���HEoo�����}^*�:W���
3
+ L�}�1J����@Y,E�����<e�G��)hB�1��iToco
@@ -1 +1 @@
1
- d440b20aae1265dabbd3ddaafb24c35e40e3ab3c
1
+ e2dc7f666268cf207253a15d55a7fc73093c6d3b
@@ -1 +1 @@
1
- d440b20aae1265dabbd3ddaafb24c35e40e3ab3c
1
+ e2dc7f666268cf207253a15d55a7fc73093c6d3b
@@ -53,6 +53,7 @@ class Convertor():
53
53
  myfile.write_bytes(file_bytes)
54
54
 
55
55
  if file_type == 'pdf':
56
+ print("PDF extraction model is: ", model_name)
56
57
  extractor = MarkdownPDFExtractor(str(myfile), output_path=str(output_dir), page_delimiter="-- NEXT PAGE --", model_name=model_name)
57
58
  extractor.extract()
58
59
  with open(output_dir / (myfile.stem + '.md')) as output_file:
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.1.8'
4
+ __version__ = '0.1.10'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.1.8
5
+ version: 0.1.10
@@ -88,6 +88,9 @@ class MarkdownPDFExtractor(PDFExtractor):
88
88
 
89
89
  # zerogpu public
90
90
  self.nclient= Client("deepak-mehta/ocr-simplify", hf_token= os.getenv('HF_TOKEN', ''))
91
+ elif "DeepSeek-OCR" in self.MODEL_NAME:
92
+ # zerogpu private
93
+ self.dclient= Client("deepak-mehta/deepseek-ocr", hf_token= os.getenv('HF_TOKEN', ''))
91
94
 
92
95
 
93
96
  self.markdown_content= ""
@@ -150,7 +153,7 @@ class MarkdownPDFExtractor(PDFExtractor):
150
153
  )
151
154
  # print("response :", response)
152
155
  return response.text
153
- elif 'nanonet' in model_name:
156
+ elif 'nanonet' in model_name or 'deepseek' in model_name:
154
157
 
155
158
  result= ""
156
159
  try:
@@ -159,24 +162,33 @@ class MarkdownPDFExtractor(PDFExtractor):
159
162
  print("file name: ", temp_file.name)
160
163
  gr_image= handle_file(temp_file.name)
161
164
  print("gr image : ", gr_image)
162
- result = self.nclient.predict(
163
- # model_name="Nanonets-OCR-s",
164
- # text= prompt,
165
- gr_image,
166
- # max_new_tokens=max_new_tokens,
167
- # temperature=0.6,
168
- # top_p=0.9,
169
- # top_k=50,
170
- # repetition_penalty=1.2,
171
-
172
- # prithiv model
173
- # api_name="/generate_image"
174
-
175
- max_new_tokens,
176
-
177
- # spaces zerogpu
178
- api_name="/predict"
179
- )
165
+ if 'nanonet' in model_name:
166
+ result = self.nclient.predict(
167
+ # model_name="Nanonets-OCR-s",
168
+ # text= prompt,
169
+ gr_image,
170
+ # max_new_tokens=max_new_tokens,
171
+ # temperature=0.6,
172
+ # top_p=0.9,
173
+ # top_k=50,
174
+ # repetition_penalty=1.2,
175
+
176
+ # prithiv model
177
+ # api_name="/generate_image"
178
+
179
+ max_new_tokens,
180
+
181
+ # spaces zerogpu
182
+ api_name="/predict"
183
+ )
184
+ else:
185
+ result = self.dclient.predict(
186
+ file_input=gr_image,
187
+ prompt_type= 'markdown',
188
+
189
+ # spaces zerogpu
190
+ api_name="/predict"
191
+ )
180
192
  print("ocr'd: ", result[:100] + "...")
181
193
  except Exception as e:
182
194
  print("Error during nanonet inference", e)
pembot/requirements.txt CHANGED
@@ -45,7 +45,7 @@ pandas==2.3.0
45
45
  pathlib==1.0.1
46
46
  pdfminer.six==20250506
47
47
  pdfplumber==0.11.7
48
- pembot==0.1.8
48
+ pembot==0.1.10
49
49
  pillow==11.3.0
50
50
  primp==0.15.0
51
51
  pyasn1==0.6.1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,19 +1,19 @@
1
1
  pembot/.gitignore,sha256=yyDEUmeqZekG4AOrU9Zvu2ZQhJvEzEg_lQp2CDfBhXM,92
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=IxKTPliAYwPHtO5PsE7Y02tusB3OjcD65i0B2PUeiAM,211
3
+ pembot/__init__.py,sha256=P-GGML59s8282frYS3dPEXwU9ISDuom4VG7VRdGVYjU,212
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
8
  pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
9
9
  pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
10
- pembot/requirements.txt,sha256=ajv8mh0fR09U-wZjAZHL7FX94WN0kZWdg75i4thD5Kg,1548
10
+ pembot/requirements.txt,sha256=5gJ3kaHgUEDfaR_BqviDxKyE93ZmuHhS96xAKpzY4cY,1549
11
11
  pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
12
- pembot/.git/COMMIT_EDITMSG,sha256=7XEtbMeWZPaVjMa9i368lBcWx9YEBJu_oMq6RTGyb7g,109
12
+ pembot/.git/COMMIT_EDITMSG,sha256=k-6PMjV8Du_kSEzKBoE4Lw65A_88EZvTnZS2RWx6xek,23
13
13
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
14
14
  pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
15
15
  pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
16
- pembot/.git/index,sha256=-o4WBbTC28YSGXroXhc208B_0K3fI6HOIX4Elq-yC9Y,2054
16
+ pembot/.git/index,sha256=SwHdoySzwwxmawraqYnAynCIqLYEiRBK0dNwtGdx1Ys,2054
17
17
  pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
18
18
  pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
19
19
  pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
@@ -30,13 +30,14 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
30
30
  pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
31
31
  pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
32
32
  pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
33
- pembot/.git/logs/HEAD,sha256=fiHdPGbqFYCfV8_1-R-SatYdhRwtL8Tg1zQLIi-yP_Y,5266
34
- pembot/.git/logs/refs/heads/main,sha256=fiHdPGbqFYCfV8_1-R-SatYdhRwtL8Tg1zQLIi-yP_Y,5266
33
+ pembot/.git/logs/HEAD,sha256=mXxnkjAFheSFjY64ZFEjigvL37uOJ0pV66vDIqQ3b-s,5618
34
+ pembot/.git/logs/refs/heads/main,sha256=mXxnkjAFheSFjY64ZFEjigvL37uOJ0pV66vDIqQ3b-s,5618
35
35
  pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
36
- pembot/.git/logs/refs/remotes/origin/main,sha256=bZMcopGrWNXDVyOQYChlWyKK1sUfiHrtrODQ23xRzPA,3066
36
+ pembot/.git/logs/refs/remotes/origin/main,sha256=a-MeM1HJwNFr00knzOx9xX8p96VyEA5SFPs9SQeu244,3358
37
37
  pembot/.git/objects/00/3ba85af0ed7b9f6ab099ca298c3d0c18fb002b,sha256=pnk_IbjhUJWavx5BKSlXX8CEvWEMoSm8Dv1tQrUmzn4,169
38
38
  pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
39
39
  pembot/.git/objects/05/5e82e69847a636258cb994bb920c03a93b5ff4,sha256=eNZTNvT7qgsLCfJvRfTETWZIkk_vBEEroLNbPC8RRa4,90
40
+ pembot/.git/objects/06/ef9ad559094e5b48fe2a1b437dca5cea07c06b,sha256=Muyv2SPX9C670CzFqB_ss1KWQMRJD0EP03aClyo_Yhg,905
40
41
  pembot/.git/objects/06/f7563094fe405dfd9c69f05e357f4e20fc5979,sha256=1BPcr84XzGPl2ULpwFPcVi1zGLpgdd_zmvxICXD34Hk,90
41
42
  pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
42
43
  pembot/.git/objects/0a/2121ea3115562cc205df572ea26532aaac5244,sha256=D0g2Dq2cUDMPiH6czmZePougntu2TNoVVEshqMCzJzo,905
@@ -52,6 +53,7 @@ pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1
52
53
  pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
53
54
  pembot/.git/objects/10/d1fb81ceede7365dbe132a770a49026e86e9a5,sha256=OhI6pEx_G6KbujS7idkp5MxJd1Aw92Wn3Sl-JBgU2VU,115
54
55
  pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
56
+ pembot/.git/objects/15/16e2e3e4eb2a9a5304ed49c8405e19ac6ee3e5,sha256=ZhNTAoIneijLZ3Xjn_mlNpfo64AYQSLl-_AvKlNToVA,55
55
57
  pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
56
58
  pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
57
59
  pembot/.git/objects/1f/1fe55f9a705cce752d77718eb870b2c5160138,sha256=YGu15dmNYYWp30eUzWiJh68q_D8lVKEujWHAZ6f2Knw,904
@@ -75,6 +77,7 @@ pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba,sha256=J8r5hqTEgAw
75
77
  pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0,sha256=Z-UoKi2MYe0qGTtBxAr5cnIOHKkhoEXMgalevFUz9lA,2992
76
78
  pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0,sha256=TLuVmtSH9K33qB-WHMxKDUihHCrwdTtCKtjBs-rAnJ4,56
77
79
  pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D4JgHkjjqcDQCwuS17w60JRkVr25ZFlcI,117
80
+ pembot/.git/objects/41/cca8bf63122d1044d2fb36f63467ccd500832e,sha256=EdQFpFKwRO06A_EiJGZGmMGzYp4IO68KHmOXNSFezUA,526
78
81
  pembot/.git/objects/42/ef76e19df247993cf5b64aba5dccaf8587a375,sha256=aDo41HaWG05-IuCeY0RPuYfi31yFhtB4y0D36A92Dkw,115
79
82
  pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
80
83
  pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
@@ -113,17 +116,22 @@ pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa9
113
116
  pembot/.git/objects/88/0c3d45ac59940344dfb6c45005f7e908173138,sha256=7VMQzB6baLdC2Uj5f84w-X6XLM3GinXGBQjewhXupAc,914
114
117
  pembot/.git/objects/89/d2439385c82b98104f27edf39bcf28a631233f,sha256=7jIYfDS2dTNKkTxZKOBmgsSZeoaE6e48JEPzIhI1ySQ,91
115
118
  pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
119
+ pembot/.git/objects/8c/122efcd19ed7f20b8e7ce8eaecdff8e67ca8fa,sha256=pqfy-pvE55ZbugbU4Z_rcOiX3-gi-ZL2Oq90FYdOWdc,169
120
+ pembot/.git/objects/8d/58ec13be39949ecfe7211b42c56acd2a83dc72,sha256=sJLULXCzo0CAnARNynNiXwWposMLEdmT2ICZnxRWJZQ,189
116
121
  pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
117
122
  pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
118
123
  pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
119
124
  pembot/.git/objects/92/2448ecc557be58195468561e475b904bd1b349,sha256=mT1KGAHx7MalAkkpE7nAu6HlwXIB1Cts3MjZDLItErk,56
125
+ pembot/.git/objects/92/94065a10a11fe0ba233baeb658f605bacd1625,sha256=kgwBuHc_aE2M4qjS9-QDqoNEYFbJ__ujvqw0IfxbY4Y,90
120
126
  pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643,sha256=WPgmr5bXli5s8rNdiUQM4IB4o_xyJe6nuI3TG4e5aYs,487
121
127
  pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
122
128
  pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
129
+ pembot/.git/objects/95/5e4b9fb51a8891b972a85719a25b521bdaf97e,sha256=8KstJeKUc2xg_KxBSOKsIoue83RQ2_W8tn9aeHZ26TE,527
123
130
  pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444,sha256=PZW83V2jXf8-zDoTJqG1jNsCMURU8NUK8ljBD458MK0,3949
124
131
  pembot/.git/objects/99/89463f57f1f2931e5973bd543c80f18b0204bc,sha256=RoShFyJfmRnXYXP_y2c_e9KE_Otawr_T1mC22BqDNDI,6472
125
132
  pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456,sha256=xIETiieOoilleucGg7vXOgjZ-v5PI0t34fDJjDD665A,4204
126
133
  pembot/.git/objects/9f/bc171dae3f6b60eaf86ed522b0adf6b123ec85,sha256=fBUMj01e9itIGlHZQcL4W3zTEmE6WgT-THQIKtC4-FA,56
134
+ pembot/.git/objects/a2/8dc2ae0dec8333e8a24d1d195067acf2bc03fc,sha256=4ycZpUoA5PSnw96rQFWQjAl9-h7w1T_QuSVQOQnY_YM,905
127
135
  pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b,sha256=lh2LurucwRdL6WP8ChgmjXrK2lR7HASIXzt4iHFrTf4,178
128
136
  pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547,sha256=kaU3Z4k6ptIwO8ktcjs2-kshb0bzM4y8Uur-a27_jnk,56
129
137
  pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126,sha256=v1UO-WINmigZNYD74kyIv310Kq5k4SNL-gQ2DYlw9xk,6258
@@ -144,23 +152,29 @@ pembot/.git/objects/bf/068a0714e2145de83a5c004f4213b091439d0e,sha256=MpiiCqAk6GQ
144
152
  pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFtMJgNKaPp6nTGDlMhPs6fkZTWevQWK_Lc,56
145
153
  pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
146
154
  pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
155
+ pembot/.git/objects/c0/e6cf6ef4d7f0100113213d8bab75b966cd79ef,sha256=CiFonN9bS6HaZOKU8-wi72dUkS4kP1kiII3QeMfyoBo,170
147
156
  pembot/.git/objects/c0/f948ab4636a125bc202368e6c9cbe80d76169a,sha256=GPQso_R_RWWLx_pF3g58MiM4HyeSnpXTeLeKDfhkyPc,526
148
157
  pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
149
158
  pembot/.git/objects/c2/926f040b089a52edfb8351480f63619ab7e0ab,sha256=HAXSsWokz2tuk9Y952ogIEzSBlbUC4lZ1CjvWBc22Cg,56
150
159
  pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
151
160
  pembot/.git/objects/c3/cc0da3d955ecec0f865c46c030a0c073697495,sha256=7ZXWsXqapYhbZZJwaaeAwqGcgX8JwoS5DazqOGaRHeQ,179
152
161
  pembot/.git/objects/c6/b72ea9f8856d3bde28cb75775ebea9840535b8,sha256=dNygXpD49awTkQZZUIQQ9uQDEN3aKHKrQEPuiyLBT6Q,527
162
+ pembot/.git/objects/c9/d9d9b3a93b142e8b7266fc4e3e2417128a0b32,sha256=v3U1X176tqD2nTW1z3UQyCTradKh9nEb8KH-ZUXQZjE,115
153
163
  pembot/.git/objects/c9/efe79dee4c91d4bb8c3d3c6e01ff70ff79a722,sha256=mnJvaTyytYLB1XPeJS_MAY-q0X3karH0k7EeBpUM16k,115
154
164
  pembot/.git/objects/cc/348ab3677f744f8d7cd8b2ac7eb775528cfb1c,sha256=2GQxoBSPEaKEZoUWa_3qEockmzUQnuWN9ohEie28VW8,90
155
165
  pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
156
166
  pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
167
+ pembot/.git/objects/d3/508f1537e9bd48bc784da569e14c342bc9c05e,sha256=0VpYRyf_U_go58Y-wncv2bgDw3o9gXBzBBuhft1eBQQ,55
157
168
  pembot/.git/objects/d4/40b20aae1265dabbd3ddaafb24c35e40e3ab3c,sha256=gHVL-l-koZnke8dJEK06U6Vz9HjqoP762qB4QUU9wE0,220
169
+ pembot/.git/objects/d6/3c63232c606d61eaaa7acce7a7a07d134d04da,sha256=PKZzHS3Io6dfNXj-ZnGU5wMG9rb4jZ6ibrmaG9M4wHc,3063
158
170
  pembot/.git/objects/d9/ec420cb55a82e7efbc8564e30ec7f4c0f6021e,sha256=NfSA8eUrylFWTcpPSd2FoA8_TcifJ_pk3jBYmbfkNKQ,90
159
171
  pembot/.git/objects/dd/82bd16a51b9bad8241d9fb46619b1c6755cafe,sha256=36R9xSCSTvnCmK5IhuTacIC8FLQYWkVYh2QwxXJ2e7s,56
160
172
  pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
161
173
  pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c,sha256=oAb2b2VwhPXykdK_ZV8MEFwfy-ZPd2Nja2gAv20U7hc,115
174
+ pembot/.git/objects/e2/dc7f666268cf207253a15d55a7fc73093c6d3b,sha256=1Bbu_8zk7EMnd2bbE1UMBGVGqMa-aab7XxCfJ89F8XQ,166
162
175
  pembot/.git/objects/e3/c62c141fc65ef2be0095c49b23e06263f0b734,sha256=aasWH_Wns5FItvtPMiBayVAD-4Z6jKHcV8nElgi0olU,164
163
176
  pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a,sha256=DNdNDoMdjDexgwLErwUZDQCpvq4-QkFHtbVRXW_jKTk,168
177
+ pembot/.git/objects/e5/0cd305278d92afa13f3876027fd52639368788,sha256=a7XcDeIDSzawYw4MZnEIOS1BaT9us3ZZRk5RPwIA0D4,116
164
178
  pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e,sha256=irJ-z8kPZmg85B0f4TQz73yJoCMWMWsIR3Pi5wx1Dlk,4034
165
179
  pembot/.git/objects/e6/adbc3c373070269f97ef82d4f63027d7878f67,sha256=e2NqH8wvYLSYgpHFoGTpurJ4gKU_PHSULZmjJETD3FQ,204
166
180
  pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc,sha256=r4zY-__F4gSfjE7onRTrcxvv8umXKuPuFzd95AiQ0cs,392
@@ -192,21 +206,21 @@ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj
192
206
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
193
207
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
194
208
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
195
- pembot/.git/refs/heads/main,sha256=ZYwcXLoKw5nkSOKKyg8_RjJmOhgY3YmP_bQNs8yI7Dk,41
209
+ pembot/.git/refs/heads/main,sha256=JUxQiuqSRv5aER4-ah752DbF4NxuOYeUYvbJkqFdr9g,41
196
210
  pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
197
- pembot/.git/refs/remotes/origin/main,sha256=ZYwcXLoKw5nkSOKKyg8_RjJmOhgY3YmP_bQNs8yI7Dk,41
211
+ pembot/.git/refs/remotes/origin/main,sha256=JUxQiuqSRv5aER4-ah752DbF4NxuOYeUYvbJkqFdr9g,41
198
212
  pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
- pembot/AnyToText/convertor.py,sha256=1wMzqcBvt6hgjvvdd2evxpFInOy-bmrr8dkOE-fUC4I,9611
213
+ pembot/AnyToText/convertor.py,sha256=DIwQdzSbi1fQgt2kB1Cv7Xbz1bScB1A1Q_vOVJ1ykmo,9678
200
214
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
201
215
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
202
216
  pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
203
217
  pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
204
218
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
205
- pembot/config/config.yaml,sha256=D1rcEalG5NiyuWcv-wsB5YkNd_S3-GfBo9Q8slJInds,156
219
+ pembot/config/config.yaml,sha256=a5eYoHXDm3HctUoIAJMg-avt9jyEb3zNEQou10cMiHc,157
206
220
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
207
221
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
208
222
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
- pembot/pdf2markdown/extract.py,sha256=0fnZnUqNy7shef6dijqEAMgBSf0YAiEx_mbRSU_pmQg,34418
223
+ pembot/pdf2markdown/extract.py,sha256=pgK5jFjKxoxZea_6AGcugWy6VfduxnsLoflNDT4WAsM,35034
210
224
  pembot/pdf2markdown/pyrightconfig.json,sha256=Vt_k4N2LtZhth0lQOQAOnRKDOQkYYVzmdtb-bP3gu7M,47
211
225
  pembot/pdf2markdown/requirements.txt,sha256=0vZQzkSZKLNVUttd4euoDyYEy0nc2W3CIVxhepHW5Ho,76
212
226
  pembot/pdf2markdown/.git/COMMIT_EDITMSG,sha256=K6REOtE5mjRmxGSeQdpaFAr1luu1BmaZnzXkTjKINzY,55
@@ -258,7 +272,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
258
272
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
273
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
260
274
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
261
- pembot-0.1.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
262
- pembot-0.1.8.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
263
- pembot-0.1.8.dist-info/METADATA,sha256=ezriS0MDurBF6OYUxptKlMABED656ZVZaDkAS84NjAY,313
264
- pembot-0.1.8.dist-info/RECORD,,
275
+ pembot-0.1.10.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
276
+ pembot-0.1.10.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
277
+ pembot-0.1.10.dist-info/METADATA,sha256=Xj477G28eNRXUvVZEwcJPyteu0GVYJ23Nv97z44tuvU,314
278
+ pembot-0.1.10.dist-info/RECORD,,