orbitkit 0.8.60__tar.gz → 0.8.61__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {orbitkit-0.8.60/orbitkit.egg-info → orbitkit-0.8.61}/PKG-INFO +1 -1
  2. orbitkit-0.8.61/orbitkit/VERSION +1 -0
  3. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/audio_transcoder/netmind_extract_v1.py +13 -6
  4. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +2 -5
  5. {orbitkit-0.8.60 → orbitkit-0.8.61/orbitkit.egg-info}/PKG-INFO +1 -1
  6. orbitkit-0.8.60/orbitkit/VERSION +0 -1
  7. {orbitkit-0.8.60 → orbitkit-0.8.61}/LICENSE +0 -0
  8. {orbitkit-0.8.60 → orbitkit-0.8.61}/MANIFEST.in +0 -0
  9. {orbitkit-0.8.60 → orbitkit-0.8.61}/README.md +0 -0
  10. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/__init__.py +0 -0
  11. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/__init__.py +0 -0
  12. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/data_preprocessing.py +0 -0
  13. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
  14. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
  15. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/file_handler.py +0 -0
  16. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
  17. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/audio_transcoder/__init__.py +0 -0
  18. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/constant/__init__.py +0 -0
  19. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/constant/report_schema.py +0 -0
  20. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/id_srv/__init__.py +0 -0
  21. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/id_srv/id_gen.py +0 -0
  22. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/id_srv/id_perm_like.py +0 -0
  23. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/lark_send/__init__.py +0 -0
  24. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/lark_send/lark.py +0 -0
  25. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/llm_tools/__init__.py +0 -0
  26. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
  27. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/orbit_type/__init__.py +0 -0
  28. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
  29. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
  30. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/orbit_type/tools.py +0 -0
  31. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_embedding/__init__.py +0 -0
  32. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
  33. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
  34. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/__init__.py +0 -0
  35. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
  36. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/exceptions.py +0 -0
  37. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/mineru_demo.py +0 -0
  38. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
  39. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
  40. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
  41. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
  42. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
  43. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
  44. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
  45. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
  46. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
  47. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/base.py +0 -0
  48. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
  49. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/core.py +0 -0
  50. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
  51. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
  52. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_extractor_simple/utils.py +0 -0
  53. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_writer/__init__.py +0 -0
  54. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
  55. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/__init__.py +0 -0
  56. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/cache_asset_downloader.py +0 -0
  57. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/common.py +0 -0
  58. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/customize_regix_manager.py +0 -0
  59. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/secret_manager.py +0 -0
  60. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/universal_extractor.py +0 -0
  61. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_aliyun.py +0 -0
  62. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
  63. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_aws.py +0 -0
  64. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
  65. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_date.py +0 -0
  66. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_html.py +0 -0
  67. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_kafka.py +0 -0
  68. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_md5.py +0 -0
  69. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_selenium.py +0 -0
  70. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_simple_timer.py +0 -0
  71. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_str.py +0 -0
  72. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_type_mapping.py +0 -0
  73. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit/util/util_url.py +0 -0
  74. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit.egg-info/SOURCES.txt +0 -0
  75. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit.egg-info/dependency_links.txt +0 -0
  76. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit.egg-info/not-zip-safe +0 -0
  77. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit.egg-info/requires.txt +0 -0
  78. {orbitkit-0.8.60 → orbitkit-0.8.61}/orbitkit.egg-info/top_level.txt +0 -0
  79. {orbitkit-0.8.60 → orbitkit-0.8.61}/setup.cfg +0 -0
  80. {orbitkit-0.8.60 → orbitkit-0.8.61}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orbitkit
3
- Version: 0.8.60
3
+ Version: 0.8.61
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -0,0 +1 @@
1
+ 0.8.61
@@ -104,7 +104,7 @@ def text_processing(netmind_data, lang, translate_model='gpt-4.1-mini'):
104
104
  def send_request_to_api(s3_remote_url, **kwargs):
105
105
  endpoint = kwargs['endpoint']
106
106
  token = kwargs['token']
107
- lang = kwargs.get('lang', 'en')
107
+ lang = kwargs.get('lang', None)
108
108
  headers = {
109
109
  'Authorization': token,
110
110
  }
@@ -112,8 +112,9 @@ def send_request_to_api(s3_remote_url, **kwargs):
112
112
  files = {
113
113
  'model': (None, 'WhisperX'),
114
114
  'url': (None, s3_remote_url),
115
- 'language': (None, lang),
116
115
  }
116
+ if lang:
117
+ files['language'] = (None, lang)
117
118
 
118
119
  response = requests.post(endpoint, headers=headers, files=files)
119
120
  response.raise_for_status()
@@ -125,7 +126,7 @@ def send_request_to_api(s3_remote_url, **kwargs):
125
126
  def send_request_to_stream(file_steam, **kwargs):
126
127
  endpoint = kwargs['endpoint']
127
128
  token = kwargs['token']
128
- lang = kwargs.get('lang', 'en')
129
+ lang = kwargs.get('lang', None)
129
130
  headers = {
130
131
  'Authorization': token,
131
132
  }
@@ -133,8 +134,9 @@ def send_request_to_stream(file_steam, **kwargs):
133
134
  files = {
134
135
  'model': (None, 'WhisperX'),
135
136
  'files': (None, file_steam),
136
- 'language': (None, lang),
137
137
  }
138
+ if lang:
139
+ files['language'] = (None, lang)
138
140
 
139
141
  response = requests.post(endpoint, headers=headers, files=files)
140
142
  response.raise_for_status()
@@ -143,7 +145,7 @@ def send_request_to_stream(file_steam, **kwargs):
143
145
 
144
146
 
145
147
  def request_wav_from_netmind(s3_client, s3_path=None, file_steam=None, **kwargs):
146
- lang = kwargs.get('lang', 'en')
148
+ lang = kwargs.get('lang', None)
147
149
  folder = kwargs.get('folder', '')
148
150
  translate_model = kwargs.get('translate_model', 'gpt-4.1-mini')
149
151
  if s3_path:
@@ -166,7 +168,12 @@ def request_wav_from_netmind(s3_client, s3_path=None, file_steam=None, **kwargs)
166
168
  with open(json_netmind_wav_path, 'w', encoding='utf-8') as json_file:
167
169
  json.dump(data, json_file, ensure_ascii=False, indent=4)
168
170
 
169
- net_process = text_processing(data, lang, translate_model=translate_model)
171
+ # lang None 时跳过翻译,直接使用原始结果
172
+ if lang:
173
+ net_process = text_processing(data, lang, translate_model=translate_model)
174
+ else:
175
+ logger.info("lang=None, 跳过翻译,保持原语言")
176
+ net_process = data
170
177
 
171
178
  # 翻译接口处理
172
179
  json_netmind_lang_wav_path = os.path.join(folder, 'netmind_lang_wav.json')
@@ -338,12 +338,10 @@ class MinerUExtract:
338
338
  return self.default_extractable(pdf_pages, 'discarded_blocks', pages_body, block_seq)
339
339
 
340
340
  def default_blocks(self, pdf_pages):
341
- print(pdf_pages.keys())
342
- raise Exception('提取block方法异常')
341
+ raise Exception(f'提取block方法异常,未知参数: {list(pdf_pages.keys())}')
343
342
 
344
343
  def default_extractable(self, pdf_pages, block_key, pages_body, block_seq):
345
344
  for pages_block in pdf_pages.get(block_key, []):
346
- print(pages_block['type'])
347
345
  handler_type_func = getattr(self, f'level_two_{pages_block["type"]}', self.level_two_default_blocks)
348
346
  block_seq, pages_body = handler_type_func(block_seq, pages_block, pages_body)
349
347
  return pages_body
@@ -541,8 +539,7 @@ class MinerUExtract:
541
539
  return block_seq, pages_body
542
540
 
543
541
  def level_two_default_blocks(self, block_seq, pages_block, pages_body):
544
- print(pages_block.keys())
545
- raise Exception('提取block方法异常')
542
+ raise Exception(f'提取block方法异常,未知类型: {pages_block.get("type")},参数: {list(pages_block.keys())}')
546
543
 
547
544
  def get_com_lines(self, pages_lines):
548
545
  _block_arr = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orbitkit
3
- Version: 0.8.60
3
+ Version: 0.8.61
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -1 +0,0 @@
1
- 0.8.60
File without changes
File without changes
File without changes
File without changes
File without changes