aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,319 @@
1
+ """
2
+ Text Chunker
3
+
4
+ Splits large texts into manageable chunks for processing.
5
+ """
6
+
7
+ from typing import List, Optional, Dict, Any
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class TextChunk:
13
+ """
14
+ A chunk of text with metadata
15
+
16
+ Attributes:
17
+ text: The chunk text content
18
+ start_char: Starting character position in original text
19
+ end_char: Ending character position in original text
20
+ chunk_index: Index of this chunk (0-based)
21
+ metadata: Optional metadata about this chunk
22
+ """
23
+
24
+ text: str
25
+ start_char: int
26
+ end_char: int
27
+ chunk_index: int
28
+ metadata: Dict[str, Any] = field(default_factory=dict)
29
+
30
+
31
+ class TextChunker:
32
+ """
33
+ Split large texts into smaller chunks
34
+
35
+ Strategies:
36
+ - Fixed size chunking (by character or token count)
37
+ - Sentence-aware chunking (don't break sentences)
38
+ - Paragraph-aware chunking (preserve paragraphs)
39
+ - Overlapping chunks (for context preservation)
40
+
41
+ Example:
42
+ ```python
43
+ chunker = TextChunker(chunk_size=1000, overlap=100)
44
+ chunks = chunker.chunk_text(long_document)
45
+
46
+ for chunk in chunks:
47
+ # Process each chunk separately
48
+ result = await process(chunk.text)
49
+ ```
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ chunk_size: int = 1000,
55
+ overlap: int = 100,
56
+ respect_sentences: bool = True,
57
+ respect_paragraphs: bool = False,
58
+ min_chunk_size: int = 100,
59
+ ):
60
+ """
61
+ Initialize text chunker
62
+
63
+ Args:
64
+ chunk_size: Target size for each chunk (in characters)
65
+ overlap: Number of characters to overlap between chunks
66
+ respect_sentences: Try to break at sentence boundaries
67
+ respect_paragraphs: Try to break at paragraph boundaries
68
+ min_chunk_size: Minimum chunk size (don't create tiny chunks)
69
+ """
70
+ self.chunk_size = chunk_size
71
+ self.overlap = overlap
72
+ self.respect_sentences = respect_sentences
73
+ self.respect_paragraphs = respect_paragraphs
74
+ self.min_chunk_size = min_chunk_size
75
+
76
+ def chunk_text(self, text: str, metadata: Optional[Dict[str, Any]] = None) -> List[TextChunk]:
77
+ """
78
+ Split text into chunks
79
+
80
+ Args:
81
+ text: Text to chunk
82
+ metadata: Optional metadata to attach to chunks
83
+
84
+ Returns:
85
+ List of TextChunk objects
86
+ """
87
+ if not text:
88
+ return []
89
+
90
+ # Handle short texts
91
+ if len(text) <= self.chunk_size:
92
+ return [
93
+ TextChunk(
94
+ text=text,
95
+ start_char=0,
96
+ end_char=len(text),
97
+ chunk_index=0,
98
+ metadata=metadata,
99
+ )
100
+ ]
101
+
102
+ # Choose chunking strategy
103
+ if self.respect_paragraphs:
104
+ return self._chunk_by_paragraphs(text, metadata)
105
+ elif self.respect_sentences:
106
+ return self._chunk_by_sentences(text, metadata)
107
+ else:
108
+ return self._chunk_fixed_size(text, metadata)
109
+
110
+ def _chunk_fixed_size(self, text: str, metadata: Optional[Dict[str, Any]]) -> List[TextChunk]:
111
+ """
112
+ Chunk text by fixed size with overlap
113
+
114
+ Args:
115
+ text: Text to chunk
116
+ metadata: Optional metadata
117
+
118
+ Returns:
119
+ List of TextChunk objects
120
+ """
121
+ chunks = []
122
+ start = 0
123
+ chunk_index = 0
124
+
125
+ while start < len(text):
126
+ end = min(start + self.chunk_size, len(text))
127
+
128
+ chunk = TextChunk(
129
+ text=text[start:end],
130
+ start_char=start,
131
+ end_char=end,
132
+ chunk_index=chunk_index,
133
+ metadata=metadata,
134
+ )
135
+ chunks.append(chunk)
136
+
137
+ # Move to next chunk with overlap
138
+ start += self.chunk_size - self.overlap
139
+ chunk_index += 1
140
+
141
+ return chunks
142
+
143
+ def _chunk_by_sentences(self, text: str, metadata: Optional[Dict[str, Any]]) -> List[TextChunk]:
144
+ """
145
+ Chunk text respecting sentence boundaries
146
+
147
+ Args:
148
+ text: Text to chunk
149
+ metadata: Optional metadata
150
+
151
+ Returns:
152
+ List of TextChunk objects
153
+ """
154
+ # Simple sentence splitting (can be improved with NLTK/spaCy)
155
+ sentences = self._split_sentences(text)
156
+
157
+ chunks: List[TextChunk] = []
158
+ current_chunk: List[str] = []
159
+ current_length = 0
160
+ current_start = 0
161
+ chunk_index = 0
162
+
163
+ for sent in sentences:
164
+ sent_length = len(sent)
165
+
166
+ # If adding this sentence would exceed chunk_size
167
+ if current_length + sent_length > self.chunk_size and current_chunk:
168
+ # Finalize current chunk
169
+ chunk_text = " ".join(current_chunk)
170
+ chunk_end = current_start + len(chunk_text)
171
+
172
+ chunks.append(
173
+ TextChunk(
174
+ text=chunk_text,
175
+ start_char=current_start,
176
+ end_char=chunk_end,
177
+ chunk_index=chunk_index,
178
+ metadata=metadata,
179
+ )
180
+ )
181
+
182
+ # Start new chunk with overlap (last few sentences)
183
+ overlap_sentences: List[str] = self._get_overlap_sentences(current_chunk)
184
+ current_chunk = overlap_sentences
185
+ current_length = sum(len(s) + 1 for s in current_chunk) # +1 for spaces
186
+ current_start = chunk_end - current_length
187
+ chunk_index += 1
188
+
189
+ current_chunk.append(sent)
190
+ current_length += sent_length + 1 # +1 for space
191
+
192
+ # Add final chunk
193
+ if current_chunk:
194
+ chunk_text = " ".join(current_chunk)
195
+ chunks.append(
196
+ TextChunk(
197
+ text=chunk_text,
198
+ start_char=current_start,
199
+ end_char=len(text),
200
+ chunk_index=chunk_index,
201
+ metadata=metadata,
202
+ )
203
+ )
204
+
205
+ return chunks
206
+
207
+ def _chunk_by_paragraphs(
208
+ self, text: str, metadata: Optional[Dict[str, Any]]
209
+ ) -> List[TextChunk]:
210
+ """
211
+ Chunk text respecting paragraph boundaries
212
+
213
+ Args:
214
+ text: Text to chunk
215
+ metadata: Optional metadata
216
+
217
+ Returns:
218
+ List of TextChunk objects
219
+ """
220
+ # Split by double newlines (paragraphs)
221
+ paragraphs = text.split("\n\n")
222
+
223
+ chunks: List[TextChunk] = []
224
+ current_chunk: List[str] = []
225
+ current_length = 0
226
+ current_start = 0
227
+ chunk_index = 0
228
+
229
+ for para in paragraphs:
230
+ para = para.strip()
231
+ if not para:
232
+ continue
233
+
234
+ para_length = len(para)
235
+
236
+ # If adding this paragraph would exceed chunk_size
237
+ if current_length + para_length > self.chunk_size and current_chunk:
238
+ # Finalize current chunk
239
+ chunk_text = "\n\n".join(current_chunk)
240
+ chunk_end = current_start + len(chunk_text)
241
+
242
+ chunks.append(
243
+ TextChunk(
244
+ text=chunk_text,
245
+ start_char=current_start,
246
+ end_char=chunk_end,
247
+ chunk_index=chunk_index,
248
+ metadata=metadata,
249
+ )
250
+ )
251
+
252
+ # Start new chunk
253
+ current_chunk = []
254
+ current_length = 0
255
+ current_start = chunk_end
256
+ chunk_index += 1
257
+
258
+ current_chunk.append(para)
259
+ current_length += para_length + 2 # +2 for \n\n
260
+
261
+ # Add final chunk
262
+ if current_chunk:
263
+ chunk_text = "\n\n".join(current_chunk)
264
+ chunks.append(
265
+ TextChunk(
266
+ text=chunk_text,
267
+ start_char=current_start,
268
+ end_char=len(text),
269
+ chunk_index=chunk_index,
270
+ metadata=metadata,
271
+ )
272
+ )
273
+
274
+ return chunks
275
+
276
+ def _split_sentences(self, text: str) -> List[str]:
277
+ """
278
+ Split text into sentences (simple implementation)
279
+
280
+ For production, consider using NLTK's sent_tokenize or spaCy.
281
+
282
+ Args:
283
+ text: Text to split
284
+
285
+ Returns:
286
+ List of sentences
287
+ """
288
+ import re
289
+
290
+ # Simple sentence splitting by period, question mark, exclamation
291
+ # This is a basic implementation - can be improved
292
+ sentences = re.split(r"(?<=[.!?])\s+", text)
293
+ return [s.strip() for s in sentences if s.strip()]
294
+
295
+ def _get_overlap_sentences(self, sentences: List[str]) -> List[str]:
296
+ """
297
+ Get last few sentences for overlap
298
+
299
+ Args:
300
+ sentences: List of sentences
301
+
302
+ Returns:
303
+ Last few sentences that fit in overlap size
304
+ """
305
+ if not sentences or self.overlap == 0:
306
+ return []
307
+
308
+ overlap_sentences = []
309
+ overlap_length = 0
310
+
311
+ # Take sentences from end until we reach overlap size
312
+ for sent in reversed(sentences):
313
+ if overlap_length + len(sent) + 1 <= self.overlap:
314
+ overlap_sentences.insert(0, sent)
315
+ overlap_length += len(sent) + 1
316
+ else:
317
+ break
318
+
319
+ return overlap_sentences
@@ -0,0 +1,27 @@
1
+ """
2
+ Knowledge Graph Entity and Relation Extractors
3
+
4
+ This module provides extractors for building knowledge graphs from text.
5
+ """
6
+
7
+ from aiecs.application.knowledge_graph.extractors.base import (
8
+ EntityExtractor,
9
+ RelationExtractor,
10
+ )
11
+ from aiecs.application.knowledge_graph.extractors.llm_entity_extractor import (
12
+ LLMEntityExtractor,
13
+ )
14
+ from aiecs.application.knowledge_graph.extractors.ner_entity_extractor import (
15
+ NEREntityExtractor,
16
+ )
17
+ from aiecs.application.knowledge_graph.extractors.llm_relation_extractor import (
18
+ LLMRelationExtractor,
19
+ )
20
+
21
+ __all__ = [
22
+ "EntityExtractor",
23
+ "RelationExtractor",
24
+ "LLMEntityExtractor",
25
+ "NEREntityExtractor",
26
+ "LLMRelationExtractor",
27
+ ]
@@ -0,0 +1,100 @@
1
+ """
2
+ Base Abstract Classes for Entity and Relation Extraction
3
+
4
+ Defines the interface for extracting entities and relations from text.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from typing import List, Optional
9
+ from aiecs.domain.knowledge_graph.models.entity import Entity
10
+ from aiecs.domain.knowledge_graph.models.relation import Relation
11
+
12
+
13
+ class EntityExtractor(ABC):
14
+ """
15
+ Abstract base class for entity extraction
16
+
17
+ Entity extractors take text input and return a list of entities found in the text.
18
+ Different implementations can use different methods (LLM, NER, rule-based, etc.).
19
+
20
+ Example:
21
+ ```python
22
+ extractor = LLMEntityExtractor(llm_client, schema)
23
+ entities = await extractor.extract_entities(
24
+ "Alice works at Tech Corp in San Francisco"
25
+ )
26
+ # Returns: [Entity(Person: Alice), Entity(Company: Tech Corp), ...]
27
+ ```
28
+ """
29
+
30
+ @abstractmethod
31
+ async def extract_entities(
32
+ self, text: str, entity_types: Optional[List[str]] = None, **kwargs
33
+ ) -> List[Entity]:
34
+ """
35
+ Extract entities from text
36
+
37
+ Args:
38
+ text: Input text to extract entities from
39
+ entity_types: Optional list of entity types to extract (e.g., ["Person", "Company"])
40
+ If None, extract all types supported by the extractor
41
+ **kwargs: Additional extractor-specific parameters
42
+
43
+ Returns:
44
+ List of Entity objects found in the text
45
+
46
+ Raises:
47
+ ValueError: If text is empty or invalid
48
+ RuntimeError: If extraction fails
49
+ """
50
+
51
+
52
+ class RelationExtractor(ABC):
53
+ """
54
+ Abstract base class for relation extraction
55
+
56
+ Relation extractors take text and a list of entities, and return relations
57
+ (edges) between those entities. This is a two-stage extraction process:
58
+ entities must be extracted first, then relations between them.
59
+
60
+ Example:
61
+ ```python
62
+ extractor = LLMRelationExtractor(llm_client, schema)
63
+
64
+ # Entities already extracted
65
+ alice = Entity(id="e1", entity_type="Person", properties={"name": "Alice"})
66
+ tech_corp = Entity(id="e2", entity_type="Company", properties={"name": "Tech Corp"})
67
+
68
+ relations = await extractor.extract_relations(
69
+ text="Alice works at Tech Corp",
70
+ entities=[alice, tech_corp]
71
+ )
72
+ # Returns: [Relation(alice -[WORKS_FOR]-> tech_corp)]
73
+ ```
74
+ """
75
+
76
+ @abstractmethod
77
+ async def extract_relations(
78
+ self,
79
+ text: str,
80
+ entities: List[Entity],
81
+ relation_types: Optional[List[str]] = None,
82
+ **kwargs,
83
+ ) -> List[Relation]:
84
+ """
85
+ Extract relations from text given known entities
86
+
87
+ Args:
88
+ text: Input text containing the entities
89
+ entities: List of entities already extracted from this text
90
+ relation_types: Optional list of relation types to extract (e.g., ["WORKS_FOR", "KNOWS"])
91
+ If None, extract all types supported by the extractor
92
+ **kwargs: Additional extractor-specific parameters
93
+
94
+ Returns:
95
+ List of Relation objects found between the entities
96
+
97
+ Raises:
98
+ ValueError: If text is empty or entities list is empty
99
+ RuntimeError: If extraction fails
100
+ """