orbitkit 0.8.32__tar.gz → 0.8.33__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {orbitkit-0.8.32/orbitkit.egg-info → orbitkit-0.8.33}/PKG-INFO +1 -1
  2. orbitkit-0.8.33/orbitkit/VERSION +1 -0
  3. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +6 -0
  4. {orbitkit-0.8.32 → orbitkit-0.8.33/orbitkit.egg-info}/PKG-INFO +1 -1
  5. orbitkit-0.8.32/orbitkit/VERSION +0 -1
  6. {orbitkit-0.8.32 → orbitkit-0.8.33}/LICENSE +0 -0
  7. {orbitkit-0.8.32 → orbitkit-0.8.33}/MANIFEST.in +0 -0
  8. {orbitkit-0.8.32 → orbitkit-0.8.33}/README.md +0 -0
  9. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/__init__.py +0 -0
  10. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/__init__.py +0 -0
  11. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/data_preprocessing.py +0 -0
  12. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
  13. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
  14. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/file_handler.py +0 -0
  15. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
  16. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/audio_transcoder/__init__.py +0 -0
  17. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
  18. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/constant/__init__.py +0 -0
  19. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/constant/report_schema.py +0 -0
  20. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/id_srv/__init__.py +0 -0
  21. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/id_srv/id_gen.py +0 -0
  22. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/id_srv/id_perm_like.py +0 -0
  23. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/lark_send/__init__.py +0 -0
  24. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/lark_send/lark.py +0 -0
  25. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/llm_tools/__init__.py +0 -0
  26. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
  27. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/orbit_type/__init__.py +0 -0
  28. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
  29. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
  30. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/orbit_type/tools.py +0 -0
  31. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_embedding/__init__.py +0 -0
  32. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
  33. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
  34. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/__init__.py +0 -0
  35. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
  36. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/exceptions.py +0 -0
  37. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
  38. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
  39. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
  40. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
  41. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
  42. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
  43. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
  44. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
  45. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
  46. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/base.py +0 -0
  47. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
  48. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/core.py +0 -0
  49. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
  50. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
  51. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_extractor_simple/utils.py +0 -0
  52. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_writer/__init__.py +0 -0
  53. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
  54. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/__init__.py +0 -0
  55. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/common.py +0 -0
  56. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/customize_regix_manager.py +0 -0
  57. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/secret_manager.py +0 -0
  58. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_aliyun.py +0 -0
  59. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
  60. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_aws.py +0 -0
  61. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
  62. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_date.py +0 -0
  63. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_html.py +0 -0
  64. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_kafka.py +0 -0
  65. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_md5.py +0 -0
  66. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_selenium.py +0 -0
  67. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_simple_timer.py +0 -0
  68. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_str.py +0 -0
  69. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_type_mapping.py +0 -0
  70. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit/util/util_url.py +0 -0
  71. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit.egg-info/SOURCES.txt +0 -0
  72. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit.egg-info/dependency_links.txt +0 -0
  73. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit.egg-info/not-zip-safe +0 -0
  74. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit.egg-info/requires.txt +0 -0
  75. {orbitkit-0.8.32 → orbitkit-0.8.33}/orbitkit.egg-info/top_level.txt +0 -0
  76. {orbitkit-0.8.32 → orbitkit-0.8.33}/setup.cfg +0 -0
  77. {orbitkit-0.8.32 → orbitkit-0.8.33}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: orbitkit
3
- Version: 0.8.32
3
+ Version: 0.8.33
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -0,0 +1 @@
1
+ 0.8.33
@@ -46,6 +46,8 @@ class PdfExtractorNetmindFileAnalysis:
46
46
  self.s3_path = s3_path
47
47
  self.slice_option = slice_option
48
48
  self.max_workers = max_workers
49
+ self.input_file_size = None
50
+ self.total_pages = None
49
51
  self.aws_access_key_id = get_from_dict_or_env(kwargs, "aws_access_key_id", "AWS_ACCESS_KEY_ID")
50
52
  self.aws_secret_access_key = get_from_dict_or_env(kwargs, "aws_secret_access_key", "AWS_SECRET_ACCESS_KEY")
51
53
  self.s3_client = boto3.client('s3',
@@ -67,8 +69,12 @@ class PdfExtractorNetmindFileAnalysis:
67
69
  def split_pdf(self, input_file: str, output_folder: str) -> List[Dict[str, str]]:
68
70
  Path(output_folder).mkdir(parents=True, exist_ok=True)
69
71
  hash_id = id_srv.get_random_short_id()
72
+ # 获取输入文件的大小
73
+ input_file_path = Path(input_file)
74
+ self.input_file_size = input_file_path.stat().st_size # 获取输入文件的大小(字节)
70
75
  pdf_document = fitz.open(input_file)
71
76
  total_pages = len(pdf_document)
77
+ self.total_pages = total_pages
72
78
  pages_per_split = self.slice_option.split_page_number
73
79
  file_path_list = []
74
80
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: orbitkit
3
- Version: 0.8.32
3
+ Version: 0.8.33
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -1 +0,0 @@
1
- 0.8.32
File without changes
File without changes
File without changes
File without changes
File without changes