orbitkit 0.8.45__tar.gz → 0.8.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {orbitkit-0.8.45/orbitkit.egg-info → orbitkit-0.8.46}/PKG-INFO +1 -1
  2. orbitkit-0.8.46/orbitkit/VERSION +1 -0
  3. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/data_preprocessing.py +4 -4
  4. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_flow_entry_process.py +8 -2
  5. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_handler_v2.py +2 -1
  6. {orbitkit-0.8.45 → orbitkit-0.8.46/orbitkit.egg-info}/PKG-INFO +1 -1
  7. orbitkit-0.8.45/orbitkit/VERSION +0 -1
  8. {orbitkit-0.8.45 → orbitkit-0.8.46}/LICENSE +0 -0
  9. {orbitkit-0.8.45 → orbitkit-0.8.46}/MANIFEST.in +0 -0
  10. {orbitkit-0.8.45 → orbitkit-0.8.46}/README.md +0 -0
  11. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/__init__.py +0 -0
  12. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/__init__.py +0 -0
  13. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
  14. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/airflow_handler/file_handler.py +0 -0
  15. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/audio_transcoder/__init__.py +0 -0
  16. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
  17. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/constant/__init__.py +0 -0
  18. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/constant/report_schema.py +0 -0
  19. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/__init__.py +0 -0
  20. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/id_gen.py +0 -0
  21. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/id_srv/id_perm_like.py +0 -0
  22. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/lark_send/__init__.py +0 -0
  23. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/lark_send/lark.py +0 -0
  24. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/llm_tools/__init__.py +0 -0
  25. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
  26. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/__init__.py +0 -0
  27. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
  28. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
  29. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/orbit_type/tools.py +0 -0
  30. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/__init__.py +0 -0
  31. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
  32. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
  33. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/__init__.py +0 -0
  34. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
  35. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/exceptions.py +0 -0
  36. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
  37. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
  38. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
  39. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
  40. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
  41. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
  42. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
  43. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
  44. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
  45. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
  46. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/base.py +0 -0
  47. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
  48. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/core.py +0 -0
  49. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
  50. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
  51. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_extractor_simple/utils.py +0 -0
  52. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_writer/__init__.py +0 -0
  53. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
  54. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/__init__.py +0 -0
  55. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/cache_asset_downloader.py +0 -0
  56. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/common.py +0 -0
  57. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/customize_regix_manager.py +0 -0
  58. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/secret_manager.py +0 -0
  59. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aliyun.py +0 -0
  60. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
  61. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aws.py +0 -0
  62. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
  63. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_date.py +0 -0
  64. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_html.py +0 -0
  65. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_kafka.py +0 -0
  66. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_md5.py +0 -0
  67. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_selenium.py +0 -0
  68. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_simple_timer.py +0 -0
  69. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_str.py +0 -0
  70. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_type_mapping.py +0 -0
  71. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit/util/util_url.py +0 -0
  72. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/SOURCES.txt +0 -0
  73. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/dependency_links.txt +0 -0
  74. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/not-zip-safe +0 -0
  75. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/requires.txt +0 -0
  76. {orbitkit-0.8.45 → orbitkit-0.8.46}/orbitkit.egg-info/top_level.txt +0 -0
  77. {orbitkit-0.8.45 → orbitkit-0.8.46}/setup.cfg +0 -0
  78. {orbitkit-0.8.45 → orbitkit-0.8.46}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: orbitkit
3
- Version: 0.8.45
3
+ Version: 0.8.46
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -0,0 +1 @@
1
+ 0.8.46
@@ -126,7 +126,7 @@ class DocumentProcessor:
126
126
  return "extract" if reported_date < datetime.datetime(2020, 1, 1) else target_stage
127
127
 
128
128
  @classmethod
129
- async def create_record(cls, doc, start_stage):
129
+ async def create_record(cls, doc, start_stage, important_level):
130
130
  attachments = doc.get('x_attachments', []) if start_stage == 'convert' else doc.get('x_attachments_pdf', [])
131
131
  s3_path_info = []
132
132
  add_extends = {}
@@ -151,7 +151,7 @@ class DocumentProcessor:
151
151
  'store_path'],
152
152
  'file_name': att['file_name']
153
153
  })
154
- result_dict = {'id': doc['_id'], 's3_path_info': s3_path_info}
154
+ result_dict = {'id': doc['_id'], 's3_path_info': s3_path_info, 'important_level': important_level}
155
155
  if add_extends:
156
156
  result_dict['extends'] = add_extends
157
157
  return True, result_dict
@@ -165,7 +165,7 @@ class DocumentProcessor:
165
165
  }
166
166
 
167
167
  @classmethod
168
- async def process(cls, doc, custom_process_step):
168
+ async def process(cls, doc, custom_process_step, important_level):
169
169
  report_id = doc['_id']
170
170
  # 筛选文件
171
171
  doc = cls.stock_us_filter_by_is_primary(doc)
@@ -195,7 +195,7 @@ class DocumentProcessor:
195
195
  "Invalid process sequence: 'start_stage' occurs before 'target_stage'.",
196
196
  doc['_id'])
197
197
 
198
- file_name_check_status, record = await cls.create_record(doc, start_stage)
198
+ file_name_check_status, record = await cls.create_record(doc, start_stage, important_level)
199
199
  if not file_name_check_status:
200
200
  return cls.create_result_info("step_error", "Document file name too lang.", report_id)
201
201
 
@@ -177,7 +177,13 @@ class FilingOfficialProcessor:
177
177
 
178
178
  async def process_task_entry(self, source: Literal["filing_data", "reports_view", "G7_demo"],
179
179
  query: dict, tags: list[str], priority: str,
180
- is_important: bool = False, custom_step: Optional[list[str]] = None):
180
+ is_important: bool = False, custom_step: Optional[list[str]] = None, important_level = None):
181
+
182
+ if not important_level or not isinstance(important_level, int):
183
+ important_level = 0
184
+
185
+ if important_level == 0:
186
+ raise ValueError(f'important_level must be an integer (int) greater than 0. {important_level}')
181
187
 
182
188
  allowed_steps = {"convert", "extract", "embedding"}
183
189
  if custom_step is not None:
@@ -206,7 +212,7 @@ class FilingOfficialProcessor:
206
212
  self.all_stat_count['all'] += 1
207
213
  for orbit_entity_id in doc['x_orbit_data']['perm_id_list']:
208
214
  perm_id_set.add(orbit_entity_id)
209
- result_record = await self.data_processor.process(doc=doc, custom_process_step=custom_step)
215
+ result_record = await self.data_processor.process(doc=doc, custom_process_step=custom_step, important_level=important_level)
210
216
  process_data.append(result_record)
211
217
  if len(process_data) >= self.max_batch_size:
212
218
  file_flow_info, xbrl_data, except_id_list, doc_error_list = self.data_processor.split_data_by_spider_name_and_step(
@@ -137,7 +137,8 @@ class FileFlowHandleV2:
137
137
  'created_at': now,
138
138
  'updated_at': now,
139
139
  'tags': params['tags'],
140
- 'tag': params['tag']
140
+ 'tag': params['tag'],
141
+ 'important_level': record.get('important_level', 0)
141
142
  }
142
143
 
143
144
  step = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: orbitkit
3
- Version: 0.8.45
3
+ Version: 0.8.46
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -1 +0,0 @@
1
- 0.8.45
File without changes
File without changes
File without changes
File without changes
File without changes