orbitkit 0.8.73__tar.gz → 0.8.74__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {orbitkit-0.8.73/orbitkit.egg-info → orbitkit-0.8.74}/PKG-INFO +1 -1
  2. orbitkit-0.8.74/orbitkit/VERSION +1 -0
  3. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/data_preprocessing.py +14 -13
  4. {orbitkit-0.8.73 → orbitkit-0.8.74/orbitkit.egg-info}/PKG-INFO +1 -1
  5. orbitkit-0.8.73/orbitkit/VERSION +0 -1
  6. {orbitkit-0.8.73 → orbitkit-0.8.74}/LICENSE +0 -0
  7. {orbitkit-0.8.73 → orbitkit-0.8.74}/MANIFEST.in +0 -0
  8. {orbitkit-0.8.73 → orbitkit-0.8.74}/README.md +0 -0
  9. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/__init__.py +0 -0
  10. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/__init__.py +0 -0
  11. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/file_flow_entry_process.py +0 -0
  12. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/file_flow_exit_process.py +0 -0
  13. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/file_handler.py +0 -0
  14. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/airflow_handler/file_handler_v2.py +0 -0
  15. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/audio_transcoder/__init__.py +0 -0
  16. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/audio_transcoder/netmind_extract_v1.py +0 -0
  17. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/constant/__init__.py +0 -0
  18. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/constant/report_schema.py +0 -0
  19. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/id_srv/__init__.py +0 -0
  20. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/id_srv/id_gen.py +0 -0
  21. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/id_srv/id_perm_like.py +0 -0
  22. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/lark_send/__init__.py +0 -0
  23. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/lark_send/lark.py +0 -0
  24. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/llm_tools/__init__.py +0 -0
  25. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/llm_tools/quick_rag_chat.py +0 -0
  26. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/orbit_type/__init__.py +0 -0
  27. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/orbit_type/doc_4_compile_rule.py +0 -0
  28. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/orbit_type/orbit_type_simple.py +0 -0
  29. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/orbit_type/tools.py +0 -0
  30. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_embedding/__init__.py +0 -0
  31. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_embedding/pdf_txt_embedding.py +0 -0
  32. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_embedding/pdf_txt_embedding_v2.py +0 -0
  33. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/__init__.py +0 -0
  34. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/a_stock_extractor_v1.py +0 -0
  35. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/exceptions.py +0 -0
  36. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/mineru_demo.py +0 -0
  37. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_block_extractor_base.py +0 -0
  38. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_block_extractor_v1.py +0 -0
  39. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_block_extractor_v2.py +0 -0
  40. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_azure.py +0 -0
  41. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_minerU_v1.py +0 -0
  42. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_netmind_v1.py +0 -0
  43. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_netmind_v2.py +0 -0
  44. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_netmind_v3.py +0 -0
  45. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor/pdf_extractor_orbit.py +0 -0
  46. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/__init__.py +0 -0
  47. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/base.py +0 -0
  48. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/cloud_provider.py +0 -0
  49. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/core.py +0 -0
  50. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/exceptions.py +0 -0
  51. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/extractors.py +0 -0
  52. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_extractor_simple/utils.py +0 -0
  53. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_writer/__init__.py +0 -0
  54. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/pdf_writer/pdf_writer_simple.py +0 -0
  55. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/__init__.py +0 -0
  56. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/cache_asset_downloader.py +0 -0
  57. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/common.py +0 -0
  58. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/customize_regix_manager.py +0 -0
  59. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/is_xbrl_structure.py +0 -0
  60. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/secret_manager.py +0 -0
  61. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/universal_extractor.py +0 -0
  62. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_aliyun.py +0 -0
  63. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_aliyun_oss_simple.py +0 -0
  64. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_aws.py +0 -0
  65. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_aws_s3_wrapper.py +0 -0
  66. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_date.py +0 -0
  67. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_html.py +0 -0
  68. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_kafka.py +0 -0
  69. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_md5.py +0 -0
  70. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_selenium.py +0 -0
  71. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_simple_timer.py +0 -0
  72. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_str.py +0 -0
  73. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_type_mapping.py +0 -0
  74. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit/util/util_url.py +0 -0
  75. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit.egg-info/SOURCES.txt +0 -0
  76. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit.egg-info/dependency_links.txt +0 -0
  77. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit.egg-info/not-zip-safe +0 -0
  78. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit.egg-info/requires.txt +0 -0
  79. {orbitkit-0.8.73 → orbitkit-0.8.74}/orbitkit.egg-info/top_level.txt +0 -0
  80. {orbitkit-0.8.73 → orbitkit-0.8.74}/setup.cfg +0 -0
  81. {orbitkit-0.8.73 → orbitkit-0.8.74}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orbitkit
3
- Version: 0.8.73
3
+ Version: 0.8.74
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -0,0 +1 @@
1
+ 0.8.74
@@ -118,18 +118,19 @@ class DocumentProcessor:
118
118
  @staticmethod
119
119
  def update_target_stage_by_report_type(doc, target_stage):
120
120
  report_type_ids = doc.get('x_orbit_data', {}).get('report_type_id_list', [])
121
- return "extract" if report_type_ids == ['19999'] else target_stage
122
-
123
- @staticmethod
124
- def update_target_stage_by_reported_at(doc, target_stage):
125
- date_str = doc.get('x_reported_at_utc_date', '1970-01-01')
126
- now = datetime.datetime.now()
127
- one_year_ago = now - relativedelta(years=1)
128
- try:
129
- reported_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
130
- except ValueError:
131
- reported_date = datetime.datetime(1970, 1, 1)
132
- return "extract" if reported_date < one_year_ago else target_stage
121
+ allow_embedding_types = {'10002', '10085', '10076', '10122', '10311', '10178', '10075', '10090', '10050'}
122
+ return target_stage if any(rid in allow_embedding_types for rid in report_type_ids) else "extract"
123
+
124
+ # @staticmethod
125
+ # def update_target_stage_by_reported_at(doc, target_stage):
126
+ # date_str = doc.get('x_reported_at_utc_date', '1970-01-01')
127
+ # now = datetime.datetime.now()
128
+ # one_year_ago = now - relativedelta(years=1)
129
+ # try:
130
+ # reported_date = datetime.datetime.strptime(date_str, "%Y-%m-%d")
131
+ # except ValueError:
132
+ # reported_date = datetime.datetime(1970, 1, 1)
133
+ # return "extract" if reported_date < one_year_ago else target_stage
133
134
 
134
135
  @staticmethod
135
136
  def update_target_stage_by_perm_match(doc, target_stage):
@@ -200,7 +201,7 @@ class DocumentProcessor:
200
201
  # 判断 特殊条件下的数据不做embedding ('19999'类型和报告日期小于2023-01-01)
201
202
  if target_stage == 'embedding' and not custom_process_step:
202
203
  target_stage = cls.update_target_stage_by_report_type(doc, target_stage)
203
- target_stage = cls.update_target_stage_by_reported_at(doc, target_stage)
204
+ # target_stage = cls.update_target_stage_by_reported_at(doc, target_stage)
204
205
  target_stage = cls.update_target_stage_by_perm_match(doc, target_stage)
205
206
  # 特殊情况下只需要做embedding 但是这个数据被条件限制为只做到提取时状态异常
206
207
  if start_stage == 'embedding' and target_stage == 'extract':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: orbitkit
3
- Version: 0.8.73
3
+ Version: 0.8.74
4
4
  Summary: This project is only for Orbit Tech internal use.
5
5
  Home-page: https://github.com/clown-0726/orbitkit
6
6
  Author: Lilu Cao
@@ -1 +0,0 @@
1
- 0.8.73
File without changes
File without changes
File without changes
File without changes
File without changes