aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1464 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Document Creator Tool
4
+
5
+ This tool is responsible for creating new documents from templates,
6
+ initializing document structure, and managing document metadata.
7
+
8
+ Key Features:
9
+ 1. Template-based document creation
10
+ 2. Document structure initialization
11
+ 3. Metadata management (title, author, date, etc.)
12
+ 4. Style configuration and presets
13
+ 5. Multi-format support (MD, HTML, DOCX, PDF, etc.)
14
+ """
15
+
16
+ import os
17
+ import json
18
+ import uuid
19
+ import tempfile
20
+ import logging
21
+ from datetime import datetime
22
+ from typing import Dict, Any, List, Optional
23
+ from enum import Enum
24
+
25
+ from pydantic import BaseModel, Field
26
+ from pydantic_settings import BaseSettings, SettingsConfigDict
27
+
28
+ from aiecs.tools.base_tool import BaseTool
29
+ from aiecs.tools import register_tool
30
+
31
+
32
+ class DocumentType(str, Enum):
33
+ """Supported document types"""
34
+
35
+ REPORT = "report"
36
+ ARTICLE = "article"
37
+ PRESENTATION = "presentation"
38
+ MANUAL = "manual"
39
+ LETTER = "letter"
40
+ PROPOSAL = "proposal"
41
+ ACADEMIC = "academic"
42
+ TECHNICAL = "technical"
43
+ CREATIVE = "creative"
44
+ CUSTOM = "custom"
45
+
46
+
47
+ class DocumentFormat(str, Enum):
48
+ """Supported output formats"""
49
+
50
+ MARKDOWN = "markdown"
51
+ HTML = "html"
52
+ DOCX = "docx"
53
+ PDF = "pdf"
54
+ LATEX = "latex"
55
+ PLAIN_TEXT = "txt"
56
+ JSON = "json"
57
+ XML = "xml"
58
+ PPTX = "pptx"
59
+ PPT = "ppt"
60
+
61
+
62
+ class TemplateType(str, Enum):
63
+ """Document template types"""
64
+
65
+ BLANK = "blank"
66
+ BUSINESS_REPORT = "business_report"
67
+ TECHNICAL_DOC = "technical_doc"
68
+ ACADEMIC_PAPER = "academic_paper"
69
+ PROJECT_PROPOSAL = "project_proposal"
70
+ USER_MANUAL = "user_manual"
71
+ PRESENTATION = "presentation"
72
+ NEWSLETTER = "newsletter"
73
+ INVOICE = "invoice"
74
+ CUSTOM = "custom"
75
+
76
+
77
+ class StylePreset(str, Enum):
78
+ """Style presets for documents"""
79
+
80
+ DEFAULT = "default"
81
+ CORPORATE = "corporate"
82
+ ACADEMIC = "academic"
83
+ MODERN = "modern"
84
+ CLASSIC = "classic"
85
+ MINIMAL = "minimal"
86
+ COLORFUL = "colorful"
87
+ PROFESSIONAL = "professional"
88
+
89
+
90
+ class DocumentCreatorError(Exception):
91
+ """Base exception for Document Creator errors"""
92
+
93
+
94
+ class TemplateError(DocumentCreatorError):
95
+ """Raised when template operations fail"""
96
+
97
+
98
+ class DocumentCreationError(DocumentCreatorError):
99
+ """Raised when document creation fails"""
100
+
101
+
102
+ @register_tool("document_creator")
103
+ class DocumentCreatorTool(BaseTool):
104
+ """
105
+ Document Creator Tool for creating new documents from templates
106
+
107
+ This tool provides:
108
+ 1. Template management and selection
109
+ 2. Document structure initialization
110
+ 3. Metadata configuration
111
+ 4. Style and format setup
112
+ 5. Multi-format document creation
113
+
114
+ Integrates with:
115
+ - DocumentWriterTool for content writing
116
+ - DocumentLayoutTool for layout configuration
117
+ - ContentInsertionTool for complex content
118
+ """
119
+
120
+ # Configuration schema
121
+ class Config(BaseSettings):
122
+ """Configuration for the document creator tool
123
+
124
+ Automatically reads from environment variables with DOC_CREATOR_ prefix.
125
+ Example: DOC_CREATOR_TEMPLATES_DIR -> templates_dir
126
+ """
127
+
128
+ model_config = SettingsConfigDict(env_prefix="DOC_CREATOR_")
129
+
130
+ templates_dir: str = Field(
131
+ default=os.path.join(tempfile.gettempdir(), "document_templates"),
132
+ description="Directory for document templates",
133
+ )
134
+ output_dir: str = Field(
135
+ default=os.path.join(tempfile.gettempdir(), "created_documents"),
136
+ description="Directory for created documents",
137
+ )
138
+ default_format: str = Field(default="markdown", description="Default output format")
139
+ default_style: str = Field(default="default", description="Default style preset")
140
+ auto_backup: bool = Field(
141
+ default=True,
142
+ description="Whether to automatically backup created documents",
143
+ )
144
+ include_metadata: bool = Field(
145
+ default=True,
146
+ description="Whether to include metadata in created documents",
147
+ )
148
+ generate_toc: bool = Field(
149
+ default=True,
150
+ description="Whether to generate table of contents automatically",
151
+ )
152
+
153
+ def __init__(self, config: Optional[Dict] = None, **kwargs):
154
+ """Initialize Document Creator Tool with settings
155
+
156
+ Configuration is automatically loaded by BaseTool from:
157
+ 1. Explicit config dict (highest priority)
158
+ 2. YAML config files (config/tools/document_creator.yaml)
159
+ 3. Environment variables (via dotenv from .env files)
160
+ 4. Tool defaults (lowest priority)
161
+
162
+ Args:
163
+ config: Optional configuration overrides
164
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
165
+ """
166
+ super().__init__(config, **kwargs)
167
+
168
+ # Configuration is automatically loaded by BaseTool into self._config_obj
169
+ # Access config via self._config_obj (BaseSettings instance)
170
+ self.config = self._config_obj if self._config_obj else self.Config()
171
+
172
+ self.logger = logging.getLogger(__name__)
173
+
174
+ # Initialize directories
175
+ self._init_directories()
176
+
177
+ # Initialize templates
178
+ self._init_templates()
179
+
180
+ # Initialize office tool for PPTX/DOCX creation
181
+ self._init_office_tool()
182
+
183
+ # Initialize document tracking
184
+ self._documents_created: List[Any] = []
185
+
186
+ def _init_directories(self):
187
+ """Initialize required directories"""
188
+ os.makedirs(self.config.templates_dir, exist_ok=True)
189
+ os.makedirs(self.config.output_dir, exist_ok=True)
190
+
191
+ def _init_templates(self):
192
+ """Initialize built-in templates"""
193
+ self.templates = {
194
+ TemplateType.BLANK: self._get_blank_template(),
195
+ TemplateType.BUSINESS_REPORT: self._get_business_report_template(),
196
+ TemplateType.TECHNICAL_DOC: self._get_technical_doc_template(),
197
+ TemplateType.ACADEMIC_PAPER: self._get_academic_paper_template(),
198
+ TemplateType.PROJECT_PROPOSAL: self._get_project_proposal_template(),
199
+ TemplateType.USER_MANUAL: self._get_user_manual_template(),
200
+ TemplateType.PRESENTATION: self._get_presentation_template(),
201
+ TemplateType.NEWSLETTER: self._get_newsletter_template(),
202
+ TemplateType.INVOICE: self._get_invoice_template(),
203
+ }
204
+
205
+ def _init_office_tool(self):
206
+ """Initialize office tool for PPTX/DOCX creation"""
207
+ try:
208
+ from aiecs.tools.task_tools.office_tool import OfficeTool
209
+
210
+ self.office_tool = OfficeTool()
211
+ self.logger.info("OfficeTool initialized successfully for PPTX/DOCX support")
212
+ except ImportError:
213
+ self.logger.warning("OfficeTool not available, PPTX/DOCX creation will be limited")
214
+ self.office_tool = None
215
+
216
+ # Schema definitions
217
+ class Create_documentSchema(BaseModel):
218
+ """Schema for create_document operation"""
219
+
220
+ document_type: DocumentType = Field(description="Type of document to create")
221
+ template_type: TemplateType = Field(description="Template to use")
222
+ output_format: DocumentFormat = Field(description="Output format")
223
+ metadata: Dict[str, Any] = Field(description="Document metadata")
224
+ style_preset: Optional[StylePreset] = Field(default=None, description="Style preset")
225
+ output_path: Optional[str] = Field(default=None, description="Custom output path")
226
+
227
+ class Create_from_templateSchema(BaseModel):
228
+ """Schema for create_from_template operation"""
229
+
230
+ template_name: str = Field(description="Name of template to use")
231
+ template_variables: Dict[str, Any] = Field(description="Variables to fill in template")
232
+ output_format: DocumentFormat = Field(description="Output format")
233
+ output_path: Optional[str] = Field(default=None, description="Custom output path")
234
+
235
+ class Setup_document_structureSchema(BaseModel):
236
+ """Schema for setup_document_structure operation"""
237
+
238
+ document_path: str = Field(description="Path to document")
239
+ sections: List[Dict[str, Any]] = Field(description="Document sections configuration")
240
+ generate_toc: bool = Field(default=True, description="Generate table of contents")
241
+ numbering_style: Optional[str] = Field(default=None, description="Section numbering style")
242
+
243
+ class Configure_metadataSchema(BaseModel):
244
+ """Schema for configure_metadata operation"""
245
+
246
+ document_path: str = Field(description="Path to document")
247
+ metadata: Dict[str, Any] = Field(description="Metadata to configure")
248
+ format_specific: bool = Field(default=True, description="Use format-specific metadata")
249
+
250
+ class Get_template_infoSchema(BaseModel):
251
+ """Schema for get_template_info operation"""
252
+
253
+ template_type: TemplateType = Field(description="Type of template")
254
+
255
+ def create_document(
256
+ self,
257
+ document_type: DocumentType,
258
+ template_type: TemplateType,
259
+ output_format: DocumentFormat,
260
+ metadata: Dict[str, Any],
261
+ style_preset: Optional[StylePreset] = None,
262
+ output_path: Optional[str] = None,
263
+ ) -> Dict[str, Any]:
264
+ """
265
+ Create a new document from template
266
+
267
+ Args:
268
+ document_type: Type of document to create
269
+ template_type: Template to use
270
+ output_format: Output format for the document
271
+ metadata: Document metadata (title, author, etc.)
272
+ style_preset: Style preset to apply
273
+ output_path: Custom output path
274
+
275
+ Returns:
276
+ Dict containing document creation results
277
+ """
278
+ try:
279
+ start_time = datetime.now()
280
+ document_id = str(uuid.uuid4())
281
+
282
+ self.logger.info(f"Creating document {document_id}: {document_type} using {template_type}")
283
+
284
+ # Step 1: Validate and prepare template
285
+ template = self._get_template(template_type)
286
+
287
+ # Step 2: Generate output path
288
+ if not output_path:
289
+ output_path = self._generate_output_path(document_type, output_format, document_id)
290
+
291
+ # Step 3: Process metadata
292
+ processed_metadata = self._process_metadata(metadata, output_format)
293
+
294
+ # Step 4: Apply style preset
295
+ preset_value = style_preset or self.config.default_style
296
+ style_preset_enum = StylePreset(preset_value) if isinstance(preset_value, str) else preset_value
297
+ style_config = self._get_style_config(style_preset_enum)
298
+
299
+ # Step 5: Create document from template
300
+ document_content = self._create_document_from_template(template, processed_metadata, style_config, output_format)
301
+
302
+ # Step 6: Write document to file
303
+ self._write_document_file(output_path, document_content, output_format)
304
+
305
+ # Step 7: Track created document
306
+ document_info = {
307
+ "document_id": document_id,
308
+ "document_type": document_type,
309
+ "template_type": template_type,
310
+ "output_format": output_format,
311
+ "output_path": output_path,
312
+ "metadata": processed_metadata,
313
+ "style_preset": style_preset,
314
+ "creation_metadata": {
315
+ "created_at": start_time.isoformat(),
316
+ "file_size": (os.path.getsize(output_path) if os.path.exists(output_path) else 0),
317
+ "duration": (datetime.now() - start_time).total_seconds(),
318
+ },
319
+ }
320
+
321
+ self._documents_created.append(document_info)
322
+
323
+ self.logger.info(f"Document {document_id} created successfully at {output_path}")
324
+ return document_info
325
+
326
+ except Exception as e:
327
+ raise DocumentCreationError(f"Failed to create document: {str(e)}")
328
+
329
+ def create_from_template(
330
+ self,
331
+ template_name: str,
332
+ template_variables: Dict[str, Any],
333
+ output_format: DocumentFormat,
334
+ output_path: Optional[str] = None,
335
+ ) -> Dict[str, Any]:
336
+ """
337
+ Create document from custom template with variables
338
+
339
+ Args:
340
+ template_name: Name of template file
341
+ template_variables: Variables to substitute in template
342
+ output_format: Output format
343
+ output_path: Custom output path
344
+
345
+ Returns:
346
+ Dict containing creation results
347
+ """
348
+ try:
349
+ # Load custom template
350
+ template_path = os.path.join(self.config.templates_dir, template_name)
351
+ if not os.path.exists(template_path):
352
+ raise TemplateError(f"Template not found: {template_name}")
353
+
354
+ with open(template_path, "r", encoding="utf-8") as f:
355
+ template_content = f.read()
356
+
357
+ # Process template variables
358
+ processed_content = self._process_template_variables(template_content, template_variables)
359
+
360
+ # Generate output path if not provided
361
+ if not output_path:
362
+ output_path = self._generate_output_path("custom", output_format, str(uuid.uuid4()))
363
+
364
+ # Write processed content
365
+ self._write_document_file(output_path, processed_content, output_format)
366
+
367
+ return {
368
+ "template_name": template_name,
369
+ "output_path": output_path,
370
+ "output_format": output_format,
371
+ "variables_used": template_variables,
372
+ "creation_time": datetime.now().isoformat(),
373
+ }
374
+
375
+ except Exception as e:
376
+ raise DocumentCreationError(f"Failed to create from template: {str(e)}")
377
+
378
+ def setup_document_structure(
379
+ self,
380
+ document_path: str,
381
+ sections: List[Dict[str, Any]],
382
+ generate_toc: bool = True,
383
+ numbering_style: Optional[str] = None,
384
+ ) -> Dict[str, Any]:
385
+ """
386
+ Setup document structure with sections and headers
387
+
388
+ Args:
389
+ document_path: Path to document
390
+ sections: List of section configurations
391
+ generate_toc: Whether to generate table of contents
392
+ numbering_style: Section numbering style
393
+
394
+ Returns:
395
+ Dict containing structure setup results
396
+ """
397
+ try:
398
+ self.logger.info(f"Setting up structure for document: {document_path}")
399
+
400
+ # Read existing document
401
+ if os.path.exists(document_path):
402
+ with open(document_path, "r", encoding="utf-8") as f:
403
+ content = f.read()
404
+ else:
405
+ content = ""
406
+
407
+ # Generate structure
408
+ structure_content = self._generate_document_structure(sections, generate_toc, numbering_style)
409
+
410
+ # Combine with existing content
411
+ final_content = self._combine_structure_with_content(structure_content, content)
412
+
413
+ # Write back to file
414
+ with open(document_path, "w", encoding="utf-8") as f:
415
+ f.write(final_content)
416
+
417
+ return {
418
+ "document_path": document_path,
419
+ "sections_created": len(sections),
420
+ "toc_generated": generate_toc,
421
+ "numbering_style": numbering_style,
422
+ "structure_setup_time": datetime.now().isoformat(),
423
+ }
424
+
425
+ except Exception as e:
426
+ raise DocumentCreationError(f"Failed to setup document structure: {str(e)}")
427
+
428
+ def configure_metadata(
429
+ self,
430
+ document_path: str,
431
+ metadata: Dict[str, Any],
432
+ format_specific: bool = True,
433
+ ) -> Dict[str, Any]:
434
+ """
435
+ Configure document metadata
436
+
437
+ Args:
438
+ document_path: Path to document
439
+ metadata: Metadata to configure
440
+ format_specific: Use format-specific metadata syntax
441
+
442
+ Returns:
443
+ Dict containing metadata configuration results
444
+ """
445
+ try:
446
+ # Detect document format
447
+ file_format = self._detect_document_format(document_path)
448
+
449
+ # Generate metadata content
450
+ if format_specific:
451
+ metadata_content = self._generate_format_specific_metadata(metadata, file_format)
452
+ else:
453
+ metadata_content = self._generate_generic_metadata(metadata)
454
+
455
+ # Insert metadata into document
456
+ self._insert_metadata_into_document(document_path, metadata_content, file_format)
457
+
458
+ return {
459
+ "document_path": document_path,
460
+ "metadata_configured": metadata,
461
+ "format": file_format,
462
+ "format_specific": format_specific,
463
+ "configuration_time": datetime.now().isoformat(),
464
+ }
465
+
466
+ except Exception as e:
467
+ raise DocumentCreationError(f"Failed to configure metadata: {str(e)}")
468
+
469
+ def list_templates(self) -> Dict[str, Any]:
470
+ """
471
+ List available document templates
472
+
473
+ Returns:
474
+ Dict containing available templates
475
+ """
476
+ built_in_templates = list(self.templates.keys())
477
+
478
+ # Scan for custom templates
479
+ custom_templates = []
480
+ if os.path.exists(self.config.templates_dir):
481
+ for file in os.listdir(self.config.templates_dir):
482
+ if file.endswith((".md", ".html", ".txt", ".json")):
483
+ custom_templates.append(file)
484
+
485
+ return {
486
+ "built_in_templates": [t.value for t in built_in_templates],
487
+ "custom_templates": custom_templates,
488
+ "templates_directory": self.config.templates_dir,
489
+ "total_templates": len(built_in_templates) + len(custom_templates),
490
+ }
491
+
492
+ def get_template_info(self, template_type: TemplateType) -> Dict[str, Any]:
493
+ """
494
+ Get information about a specific template
495
+
496
+ Args:
497
+ template_type: Type of template
498
+
499
+ Returns:
500
+ Dict containing template information
501
+ """
502
+ if template_type not in self.templates:
503
+ raise TemplateError(f"Template not found: {template_type}")
504
+
505
+ template = self.templates[template_type]
506
+
507
+ return {
508
+ "template_type": template_type.value,
509
+ "name": template.get("name", ""),
510
+ "description": template.get("description", ""),
511
+ "sections": template.get("sections", []),
512
+ "variables": template.get("variables", []),
513
+ "supported_formats": template.get("supported_formats", []),
514
+ "style_presets": template.get("style_presets", []),
515
+ }
516
+
517
+ def get_created_documents(self) -> List[Dict[str, Any]]:
518
+ """
519
+ Get list of documents created in this session
520
+
521
+ Returns:
522
+ List of created document information
523
+ """
524
+ return self._documents_created.copy()
525
+
526
+ # Template definitions
527
+ def _get_blank_template(self) -> Dict[str, Any]:
528
+ """Get blank document template"""
529
+ return {
530
+ "name": "Blank Document",
531
+ "description": "Empty document with basic structure",
532
+ "content": "",
533
+ "sections": [],
534
+ "variables": [],
535
+ "supported_formats": ["markdown", "html", "txt", "docx"],
536
+ "metadata_template": {
537
+ "title": "New Document",
538
+ "author": "Author",
539
+ "date": datetime.now().strftime("%Y-%m-%d"),
540
+ },
541
+ }
542
+
543
+ def _get_business_report_template(self) -> Dict[str, Any]:
544
+ """Get business report template"""
545
+ return {
546
+ "name": "Business Report",
547
+ "description": "Professional business report template",
548
+ "content": """# {title}
549
+
550
+ **Date:** {date}
551
+ **Author:** {author}
552
+ **Department:** {department}
553
+
554
+ ## Executive Summary
555
+
556
+ {executive_summary}
557
+
558
+ ## Introduction
559
+
560
+ {introduction}
561
+
562
+ ## Analysis
563
+
564
+ ### Key Findings
565
+
566
+ {key_findings}
567
+
568
+ ### Data Analysis
569
+
570
+ {data_analysis}
571
+
572
+ ## Recommendations
573
+
574
+ {recommendations}
575
+
576
+ ## Conclusion
577
+
578
+ {conclusion}
579
+
580
+ ## Appendices
581
+
582
+ {appendices}
583
+ """,
584
+ "sections": [
585
+ {"name": "Executive Summary", "level": 2, "required": True},
586
+ {"name": "Introduction", "level": 2, "required": True},
587
+ {"name": "Analysis", "level": 2, "required": True},
588
+ {"name": "Recommendations", "level": 2, "required": True},
589
+ {"name": "Conclusion", "level": 2, "required": True},
590
+ ],
591
+ "variables": [
592
+ "title",
593
+ "date",
594
+ "author",
595
+ "department",
596
+ "executive_summary",
597
+ "introduction",
598
+ "key_findings",
599
+ "data_analysis",
600
+ "recommendations",
601
+ "conclusion",
602
+ "appendices",
603
+ ],
604
+ "supported_formats": ["markdown", "html", "docx", "pdf"],
605
+ "style_presets": ["corporate", "professional", "modern"],
606
+ }
607
+
608
+ def _get_technical_doc_template(self) -> Dict[str, Any]:
609
+ """Get technical documentation template"""
610
+ return {
611
+ "name": "Technical Documentation",
612
+ "description": "Technical documentation with code examples",
613
+ "content": """# {title}
614
+
615
+ **Version:** {version}
616
+ **Last Updated:** {date}
617
+ **Author:** {author}
618
+
619
+ ## Overview
620
+
621
+ {overview}
622
+
623
+ ## Prerequisites
624
+
625
+ {prerequisites}
626
+
627
+ ## Installation
628
+
629
+ {installation}
630
+
631
+ ## Configuration
632
+
633
+ {configuration}
634
+
635
+ ## Usage
636
+
637
+ {usage}
638
+
639
+ ## API Reference
640
+
641
+ {api_reference}
642
+
643
+ ## Examples
644
+
645
+ {examples}
646
+
647
+ ## Troubleshooting
648
+
649
+ {troubleshooting}
650
+
651
+ ## Changelog
652
+
653
+ {changelog}
654
+ """,
655
+ "sections": [
656
+ {"name": "Overview", "level": 2, "required": True},
657
+ {"name": "Prerequisites", "level": 2, "required": False},
658
+ {"name": "Installation", "level": 2, "required": True},
659
+ {"name": "Configuration", "level": 2, "required": False},
660
+ {"name": "Usage", "level": 2, "required": True},
661
+ {"name": "API Reference", "level": 2, "required": False},
662
+ {"name": "Examples", "level": 2, "required": True},
663
+ {"name": "Troubleshooting", "level": 2, "required": False},
664
+ ],
665
+ "variables": [
666
+ "title",
667
+ "version",
668
+ "date",
669
+ "author",
670
+ "overview",
671
+ "prerequisites",
672
+ "installation",
673
+ "configuration",
674
+ "usage",
675
+ "api_reference",
676
+ "examples",
677
+ "troubleshooting",
678
+ "changelog",
679
+ ],
680
+ "supported_formats": ["markdown", "html", "pdf"],
681
+ "style_presets": ["technical", "modern", "minimal"],
682
+ }
683
+
684
+ def _get_academic_paper_template(self) -> Dict[str, Any]:
685
+ """Get academic paper template"""
686
+ return {
687
+ "name": "Academic Paper",
688
+ "description": "Academic research paper template",
689
+ "content": """# {title}
690
+
691
+ **Author:** {author}
692
+ **Institution:** {institution}
693
+ **Email:** {email}
694
+ **Date:** {date}
695
+
696
+ ## Abstract
697
+
698
+ {abstract}
699
+
700
+ **Keywords:** {keywords}
701
+
702
+ ## 1. Introduction
703
+
704
+ {introduction}
705
+
706
+ ## 2. Literature Review
707
+
708
+ {literature_review}
709
+
710
+ ## 3. Methodology
711
+
712
+ {methodology}
713
+
714
+ ## 4. Results
715
+
716
+ {results}
717
+
718
+ ## 5. Discussion
719
+
720
+ {discussion}
721
+
722
+ ## 6. Conclusion
723
+
724
+ {conclusion}
725
+
726
+ ## References
727
+
728
+ {references}
729
+
730
+ ## Appendices
731
+
732
+ {appendices}
733
+ """,
734
+ "sections": [
735
+ {"name": "Abstract", "level": 2, "required": True},
736
+ {"name": "Introduction", "level": 2, "required": True},
737
+ {"name": "Literature Review", "level": 2, "required": True},
738
+ {"name": "Methodology", "level": 2, "required": True},
739
+ {"name": "Results", "level": 2, "required": True},
740
+ {"name": "Discussion", "level": 2, "required": True},
741
+ {"name": "Conclusion", "level": 2, "required": True},
742
+ {"name": "References", "level": 2, "required": True},
743
+ ],
744
+ "variables": [
745
+ "title",
746
+ "author",
747
+ "institution",
748
+ "email",
749
+ "date",
750
+ "abstract",
751
+ "keywords",
752
+ "introduction",
753
+ "literature_review",
754
+ "methodology",
755
+ "results",
756
+ "discussion",
757
+ "conclusion",
758
+ "references",
759
+ "appendices",
760
+ ],
761
+ "supported_formats": ["markdown", "latex", "pdf"],
762
+ "style_presets": ["academic", "classic", "formal"],
763
+ }
764
+
765
+ def _get_project_proposal_template(self) -> Dict[str, Any]:
766
+ """Get project proposal template"""
767
+ return {
768
+ "name": "Project Proposal",
769
+ "description": "Project proposal and planning template",
770
+ "content": """# {project_name}
771
+
772
+ **Proposal Date:** {date}
773
+ **Project Manager:** {project_manager}
774
+ **Department:** {department}
775
+ **Budget:** {budget}
776
+
777
+ ## Project Overview
778
+
779
+ {project_overview}
780
+
781
+ ## Objectives
782
+
783
+ {objectives}
784
+
785
+ ## Scope
786
+
787
+ ### In Scope
788
+ {in_scope}
789
+
790
+ ### Out of Scope
791
+ {out_scope}
792
+
793
+ ## Timeline
794
+
795
+ {timeline}
796
+
797
+ ## Resources Required
798
+
799
+ {resources}
800
+
801
+ ## Budget Breakdown
802
+
803
+ {budget_breakdown}
804
+
805
+ ## Risk Assessment
806
+
807
+ {risk_assessment}
808
+
809
+ ## Success Criteria
810
+
811
+ {success_criteria}
812
+
813
+ ## Next Steps
814
+
815
+ {next_steps}
816
+ """,
817
+ "variables": [
818
+ "project_name",
819
+ "date",
820
+ "project_manager",
821
+ "department",
822
+ "budget",
823
+ "project_overview",
824
+ "objectives",
825
+ "in_scope",
826
+ "out_scope",
827
+ "timeline",
828
+ "resources",
829
+ "budget_breakdown",
830
+ "risk_assessment",
831
+ "success_criteria",
832
+ "next_steps",
833
+ ],
834
+ "supported_formats": ["markdown", "html", "docx", "pdf"],
835
+ "style_presets": ["professional", "corporate", "modern"],
836
+ }
837
+
838
+ def _get_user_manual_template(self) -> Dict[str, Any]:
839
+ """Get user manual template"""
840
+ return {
841
+ "name": "User Manual",
842
+ "description": "User manual and guide template",
843
+ "content": """# {product_name} User Manual
844
+
845
+ **Version:** {version}
846
+ **Date:** {date}
847
+ **Support:** {support_contact}
848
+
849
+ ## Table of Contents
850
+
851
+ 1. [Getting Started](#getting-started)
852
+ 2. [Basic Features](#basic-features)
853
+ 3. [Advanced Features](#advanced-features)
854
+ 4. [Troubleshooting](#troubleshooting)
855
+ 5. [FAQ](#faq)
856
+
857
+ ## Getting Started
858
+
859
+ {getting_started}
860
+
861
+ ## Basic Features
862
+
863
+ {basic_features}
864
+
865
+ ## Advanced Features
866
+
867
+ {advanced_features}
868
+
869
+ ## Troubleshooting
870
+
871
+ {troubleshooting}
872
+
873
+ ## FAQ
874
+
875
+ {faq}
876
+
877
+ ## Contact Support
878
+
879
+ {support_info}
880
+ """,
881
+ "variables": [
882
+ "product_name",
883
+ "version",
884
+ "date",
885
+ "support_contact",
886
+ "getting_started",
887
+ "basic_features",
888
+ "advanced_features",
889
+ "troubleshooting",
890
+ "faq",
891
+ "support_info",
892
+ ],
893
+ "supported_formats": ["markdown", "html", "pdf"],
894
+ "style_presets": ["user-friendly", "modern", "minimal"],
895
+ }
896
+
897
+ def _get_presentation_template(self) -> Dict[str, Any]:
898
+ """Get presentation template"""
899
+ return {
900
+ "name": "Presentation",
901
+ "description": "Slide presentation template",
902
+ "content": """# {title}
903
+
904
+ ---
905
+
906
+ ## Slide 1: Title Slide
907
+
908
+ ### {title}
909
+ **Presenter:** {presenter}
910
+ **Date:** {date}
911
+ **Organization:** {organization}
912
+
913
+ ---
914
+
915
+ ## Slide 2: Agenda
916
+
917
+ {agenda}
918
+
919
+ ---
920
+
921
+ ## Slide 3: Introduction
922
+
923
+ {introduction}
924
+
925
+ ---
926
+
927
+ ## Slide 4: Main Content
928
+
929
+ {main_content}
930
+
931
+ ---
932
+
933
+ ## Slide 5: Conclusion
934
+
935
+ {conclusion}
936
+
937
+ ---
938
+
939
+ ## Slide 6: Questions
940
+
941
+ {questions}
942
+
943
+ ---
944
+
945
+ ## Slide 7: Thank You
946
+
947
+ **Contact Information:**
948
+ {contact_info}
949
+ """,
950
+ "variables": [
951
+ "title",
952
+ "presenter",
953
+ "date",
954
+ "organization",
955
+ "agenda",
956
+ "introduction",
957
+ "main_content",
958
+ "conclusion",
959
+ "questions",
960
+ "contact_info",
961
+ ],
962
+ "supported_formats": ["markdown", "html", "pptx"],
963
+ "style_presets": ["presentation", "modern", "colorful"],
964
+ }
965
+
966
+ def _get_newsletter_template(self) -> Dict[str, Any]:
967
+ """Get newsletter template"""
968
+ return {
969
+ "name": "Newsletter",
970
+ "description": "Newsletter and bulletin template",
971
+ "content": """# {newsletter_name}
972
+
973
+ **Issue #{issue_number}** | {date}
974
+
975
+ ## Headlines
976
+
977
+ {headlines}
978
+
979
+ ## Feature Article
980
+
981
+ {feature_article}
982
+
983
+ ## News Briefs
984
+
985
+ {news_briefs}
986
+
987
+ ## Upcoming Events
988
+
989
+ {upcoming_events}
990
+
991
+ ## Community Spotlight
992
+
993
+ {community_spotlight}
994
+
995
+ ## Contact Us
996
+
997
+ {contact_info}
998
+ """,
999
+ "variables": [
1000
+ "newsletter_name",
1001
+ "issue_number",
1002
+ "date",
1003
+ "headlines",
1004
+ "feature_article",
1005
+ "news_briefs",
1006
+ "upcoming_events",
1007
+ "community_spotlight",
1008
+ "contact_info",
1009
+ ],
1010
+ "supported_formats": ["markdown", "html"],
1011
+ "style_presets": ["newsletter", "colorful", "modern"],
1012
+ }
1013
+
1014
+ def _get_invoice_template(self) -> Dict[str, Any]:
1015
+ """Get invoice template"""
1016
+ return {
1017
+ "name": "Invoice",
1018
+ "description": "Business invoice template",
1019
+ "content": """# INVOICE
1020
+
1021
+ **Invoice #:** {invoice_number}
1022
+ **Date:** {date}
1023
+ **Due Date:** {due_date}
1024
+
1025
+ ## Bill To:
1026
+ {client_info}
1027
+
1028
+ ## Bill From:
1029
+ {company_info}
1030
+
1031
+ ## Items
1032
+
1033
+ {items_table}
1034
+
1035
+ ## Summary
1036
+
1037
+ **Subtotal:** {subtotal}
1038
+ **Tax:** {tax}
1039
+ **Total:** {total}
1040
+
1041
+ ## Payment Terms
1042
+
1043
+ {payment_terms}
1044
+
1045
+ ## Notes
1046
+
1047
+ {notes}
1048
+ """,
1049
+ "variables": [
1050
+ "invoice_number",
1051
+ "date",
1052
+ "due_date",
1053
+ "client_info",
1054
+ "company_info",
1055
+ "items_table",
1056
+ "subtotal",
1057
+ "tax",
1058
+ "total",
1059
+ "payment_terms",
1060
+ "notes",
1061
+ ],
1062
+ "supported_formats": ["markdown", "html", "pdf"],
1063
+ "style_presets": ["professional", "corporate", "minimal"],
1064
+ }
1065
+
1066
+ # Helper methods
1067
+ def _get_template(self, template_type: TemplateType) -> Dict[str, Any]:
1068
+ """Get template by type"""
1069
+ if template_type not in self.templates:
1070
+ raise TemplateError(f"Template not found: {template_type}")
1071
+ return self.templates[template_type]
1072
+
1073
+ def _generate_output_path(
1074
+ self,
1075
+ document_type: str,
1076
+ output_format: DocumentFormat,
1077
+ document_id: str,
1078
+ ) -> str:
1079
+ """Generate output path for document"""
1080
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1081
+ # Handle PPT format - use pptx extension
1082
+ file_extension = output_format.value
1083
+ if output_format == DocumentFormat.PPT:
1084
+ file_extension = "pptx" # PPT format uses PPTX extension
1085
+ filename = f"{document_type}_{timestamp}_{document_id[:8]}.{file_extension}"
1086
+ return os.path.join(self.config.output_dir, filename)
1087
+
1088
+ def _process_metadata(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> Dict[str, Any]:
1089
+ """Process and validate metadata"""
1090
+ processed = metadata.copy()
1091
+
1092
+ # Add default metadata if missing
1093
+ if "date" not in processed:
1094
+ processed["date"] = datetime.now().strftime("%Y-%m-%d")
1095
+ if "created_by" not in processed:
1096
+ processed["created_by"] = "AIECS Document Creator"
1097
+ if "format" not in processed:
1098
+ processed["format"] = output_format.value
1099
+
1100
+ return processed
1101
+
1102
+ def _get_style_config(self, style_preset: StylePreset) -> Dict[str, Any]:
1103
+ """Get style configuration for preset"""
1104
+ style_configs = {
1105
+ StylePreset.DEFAULT: {
1106
+ "font_family": "Arial",
1107
+ "font_size": 12,
1108
+ "colors": {"primary": "#000000"},
1109
+ },
1110
+ StylePreset.CORPORATE: {
1111
+ "font_family": "Calibri",
1112
+ "font_size": 11,
1113
+ "colors": {"primary": "#2E5D92"},
1114
+ },
1115
+ StylePreset.ACADEMIC: {
1116
+ "font_family": "Times New Roman",
1117
+ "font_size": 12,
1118
+ "colors": {"primary": "#000000"},
1119
+ },
1120
+ StylePreset.MODERN: {
1121
+ "font_family": "Helvetica",
1122
+ "font_size": 11,
1123
+ "colors": {"primary": "#333333"},
1124
+ },
1125
+ StylePreset.CLASSIC: {
1126
+ "font_family": "Georgia",
1127
+ "font_size": 12,
1128
+ "colors": {"primary": "#1a1a1a"},
1129
+ },
1130
+ StylePreset.MINIMAL: {
1131
+ "font_family": "Arial",
1132
+ "font_size": 10,
1133
+ "colors": {"primary": "#444444"},
1134
+ },
1135
+ StylePreset.COLORFUL: {
1136
+ "font_family": "Verdana",
1137
+ "font_size": 11,
1138
+ "colors": {"primary": "#2E8B57"},
1139
+ },
1140
+ StylePreset.PROFESSIONAL: {
1141
+ "font_family": "Segoe UI",
1142
+ "font_size": 11,
1143
+ "colors": {"primary": "#2F4F4F"},
1144
+ },
1145
+ }
1146
+ return style_configs.get(style_preset, style_configs[StylePreset.DEFAULT])
1147
+
1148
+ def _create_document_from_template(
1149
+ self,
1150
+ template: Dict[str, Any],
1151
+ metadata: Dict[str, Any],
1152
+ style_config: Dict[str, Any],
1153
+ output_format: DocumentFormat,
1154
+ ) -> str:
1155
+ """Create document content from template"""
1156
+ content = template.get("content", "")
1157
+
1158
+ # Apply metadata to template
1159
+ if content and template.get("variables"):
1160
+ # Replace template variables with metadata values
1161
+ for var in template["variables"]:
1162
+ placeholder = f"{{{var}}}"
1163
+ value = metadata.get(var, f"[{var}]")
1164
+ content = content.replace(placeholder, str(value))
1165
+
1166
+ # Add metadata header if required
1167
+ if self.config.include_metadata:
1168
+ metadata_header = self._generate_metadata_header(metadata, output_format)
1169
+ content = metadata_header + "\n\n" + content
1170
+
1171
+ return content
1172
+
1173
+ def _generate_metadata_header(self, metadata: Dict[str, Any], output_format: DocumentFormat) -> str:
1174
+ """Generate metadata header for document"""
1175
+ if output_format == DocumentFormat.MARKDOWN:
1176
+ return "---\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()]) + "\n---"
1177
+ elif output_format == DocumentFormat.HTML:
1178
+ meta_tags = "\n".join([f'<meta name="{k}" content="{v}">' for k, v in metadata.items()])
1179
+ return f"<!-- Document Metadata -->\n{meta_tags}\n<!-- End Metadata -->"
1180
+ else:
1181
+ return "# Document Metadata\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()])
1182
+
1183
+ def _write_document_file(self, output_path: str, content: str, output_format: DocumentFormat):
1184
+ """Write document content to file"""
1185
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
1186
+
1187
+ if output_format in [
1188
+ DocumentFormat.MARKDOWN,
1189
+ DocumentFormat.HTML,
1190
+ DocumentFormat.PLAIN_TEXT,
1191
+ DocumentFormat.LATEX,
1192
+ ]:
1193
+ with open(output_path, "w", encoding="utf-8") as f:
1194
+ f.write(content)
1195
+ elif output_format == DocumentFormat.JSON:
1196
+ with open(output_path, "w", encoding="utf-8") as f:
1197
+ json.dump({"content": content}, f, indent=2, ensure_ascii=False)
1198
+ elif output_format in [DocumentFormat.PPTX, DocumentFormat.PPT]:
1199
+ # Use office_tool to create PPTX file
1200
+ self._write_pptx_file(output_path, content)
1201
+ elif output_format == DocumentFormat.DOCX:
1202
+ # Use office_tool to create DOCX file
1203
+ self._write_docx_file(output_path, content)
1204
+ else:
1205
+ # For other formats, write as text for now
1206
+ with open(output_path, "w", encoding="utf-8") as f:
1207
+ f.write(content)
1208
+
1209
+ def _write_pptx_file(self, output_path: str, content: str):
1210
+ """Write content to PPTX file using office_tool"""
1211
+ if not self.office_tool:
1212
+ raise DocumentCreationError("OfficeTool not available. Cannot create PPTX files.")
1213
+
1214
+ try:
1215
+ # Parse content to extract slides
1216
+ # Slides are separated by "---" or slide markers like "## Slide X:"
1217
+ slides = self._parse_content_to_slides(content)
1218
+
1219
+ # Use office_tool to create PPTX
1220
+ result = self.office_tool.write_pptx(
1221
+ slides=slides,
1222
+ output_path=output_path,
1223
+ image_path=None, # Can be enhanced to extract image paths from metadata
1224
+ )
1225
+
1226
+ if not result.get("success"):
1227
+ raise DocumentCreationError(f"Failed to create PPTX file: {result}")
1228
+
1229
+ self.logger.info(f"PPTX file created successfully: {output_path}")
1230
+
1231
+ except Exception as e:
1232
+ raise DocumentCreationError(f"Failed to write PPTX file: {str(e)}")
1233
+
1234
+ def _write_docx_file(self, output_path: str, content: str):
1235
+ """Write content to DOCX file using office_tool"""
1236
+ if not self.office_tool:
1237
+ raise DocumentCreationError("OfficeTool not available. Cannot create DOCX files.")
1238
+
1239
+ try:
1240
+ # Use office_tool to create DOCX
1241
+ result = self.office_tool.write_docx(
1242
+ text=content,
1243
+ output_path=output_path,
1244
+ table_data=None, # Can be enhanced to extract tables from content
1245
+ )
1246
+
1247
+ if not result.get("success"):
1248
+ raise DocumentCreationError(f"Failed to create DOCX file: {result}")
1249
+
1250
+ self.logger.info(f"DOCX file created successfully: {output_path}")
1251
+
1252
+ except Exception as e:
1253
+ raise DocumentCreationError(f"Failed to write DOCX file: {str(e)}")
1254
+
1255
+ def _parse_content_to_slides(self, content: str) -> List[str]:
1256
+ """Parse content string into list of slide contents
1257
+
1258
+ Supports multiple slide separation formats:
1259
+ - "---" separator (markdown style)
1260
+ - "## Slide X:" headers
1261
+ - Empty lines between slides
1262
+ """
1263
+ slides = []
1264
+
1265
+ # Split by "---" separator (common in markdown presentations)
1266
+ if "---" in content:
1267
+ parts = content.split("---")
1268
+ for part in parts:
1269
+ part = part.strip()
1270
+ if part:
1271
+ # Remove slide headers like "## Slide X: Title"
1272
+ lines = part.split("\n")
1273
+ cleaned_lines = []
1274
+ for line in lines:
1275
+ # Skip slide headers
1276
+ if line.strip().startswith("## Slide") and ":" in line:
1277
+ continue
1278
+ cleaned_lines.append(line)
1279
+ slide_content = "\n".join(cleaned_lines).strip()
1280
+ if slide_content:
1281
+ slides.append(slide_content)
1282
+ else:
1283
+ # Try to split by "## Slide" headers
1284
+ if "## Slide" in content:
1285
+ parts = content.split("## Slide")
1286
+ for i, part in enumerate(parts):
1287
+ if i == 0:
1288
+ # First part might be title slide
1289
+ part = part.strip()
1290
+ if part:
1291
+ slides.append(part)
1292
+ else:
1293
+ # Extract content after "Slide X: Title"
1294
+ lines = part.split("\n", 1)
1295
+ if len(lines) > 1:
1296
+ slide_content = lines[1].strip()
1297
+ if slide_content:
1298
+ slides.append(slide_content)
1299
+ else:
1300
+ # Fallback: split by double newlines (paragraph breaks)
1301
+ parts = content.split("\n\n")
1302
+ current_slide = []
1303
+ for part in parts:
1304
+ part = part.strip()
1305
+ if part:
1306
+ # If it's a header, start a new slide
1307
+ if part.startswith("#"):
1308
+ if current_slide:
1309
+ slides.append("\n".join(current_slide))
1310
+ current_slide = []
1311
+ current_slide.append(part)
1312
+
1313
+ if current_slide:
1314
+ slides.append("\n".join(current_slide))
1315
+
1316
+ # If no slides found, create a single slide with all content
1317
+ if not slides:
1318
+ slides = [content.strip()] if content.strip() else [""]
1319
+
1320
+ return slides
1321
+
1322
+ def _process_template_variables(self, template_content: str, variables: Dict[str, Any]) -> str:
1323
+ """Process template variables in content"""
1324
+ result = template_content
1325
+ for key, value in variables.items():
1326
+ placeholder = f"{{{key}}}"
1327
+ result = result.replace(placeholder, str(value))
1328
+ return result
1329
+
1330
+ def _generate_document_structure(
1331
+ self,
1332
+ sections: List[Dict[str, Any]],
1333
+ generate_toc: bool,
1334
+ numbering_style: Optional[str],
1335
+ ) -> str:
1336
+ """Generate document structure from sections"""
1337
+ structure_parts = []
1338
+
1339
+ # Generate table of contents
1340
+ if generate_toc:
1341
+ toc = self._generate_table_of_contents(sections, numbering_style)
1342
+ structure_parts.append(toc)
1343
+
1344
+ # Generate section headers
1345
+ for i, section in enumerate(sections, 1):
1346
+ level = section.get("level", 2)
1347
+ title = section.get("title", f"Section {i}")
1348
+
1349
+ if numbering_style == "numeric":
1350
+ header = f"{'#' * level} {i}. {title}"
1351
+ elif numbering_style == "alpha":
1352
+ alpha = chr(ord("A") + i - 1) if i <= 26 else f"Section{i}"
1353
+ header = f"{'#' * level} {alpha}. {title}"
1354
+ else:
1355
+ header = f"{'#' * level} {title}"
1356
+
1357
+ structure_parts.append(header)
1358
+ structure_parts.append("") # Empty line
1359
+
1360
+ # Add placeholder content
1361
+ placeholder = section.get("placeholder", f"Content for {title} goes here...")
1362
+ structure_parts.append(placeholder)
1363
+ structure_parts.append("") # Empty line
1364
+
1365
+ return "\n".join(structure_parts)
1366
+
1367
+ def _generate_table_of_contents(self, sections: List[Dict[str, Any]], numbering_style: Optional[str]) -> str:
1368
+ """Generate table of contents"""
1369
+ toc_parts = ["# Table of Contents", ""]
1370
+
1371
+ for i, section in enumerate(sections, 1):
1372
+ title = section.get("title", f"Section {i}")
1373
+ level = section.get("level", 2)
1374
+ indent = " " * (level - 1)
1375
+
1376
+ if numbering_style == "numeric":
1377
+ toc_line = f"{indent}- {i}. {title}"
1378
+ elif numbering_style == "alpha":
1379
+ alpha = chr(ord("A") + i - 1) if i <= 26 else f"Section{i}"
1380
+ toc_line = f"{indent}- {alpha}. {title}"
1381
+ else:
1382
+ toc_line = f"{indent}- {title}"
1383
+
1384
+ toc_parts.append(toc_line)
1385
+
1386
+ toc_parts.extend(["", "---", ""])
1387
+ return "\n".join(toc_parts)
1388
+
1389
+ def _combine_structure_with_content(self, structure: str, existing_content: str) -> str:
1390
+ """Combine generated structure with existing content"""
1391
+ if not existing_content.strip():
1392
+ return structure
1393
+
1394
+ # If existing content has structure markers, replace them
1395
+ if "# Table of Contents" in existing_content:
1396
+ # Replace existing structure
1397
+ lines = existing_content.split("\n")
1398
+ content_start = -1
1399
+ for i, line in enumerate(lines):
1400
+ if line.startswith("---") and i > 0:
1401
+ content_start = i + 1
1402
+ break
1403
+
1404
+ if content_start > 0:
1405
+ existing_body = "\n".join(lines[content_start:])
1406
+ return structure + "\n" + existing_body
1407
+
1408
+ return structure + "\n\n" + existing_content
1409
+
1410
+ def _detect_document_format(self, document_path: str) -> DocumentFormat:
1411
+ """Detect document format from file extension"""
1412
+ ext = os.path.splitext(document_path)[1].lower()
1413
+ format_map = {
1414
+ ".md": DocumentFormat.MARKDOWN,
1415
+ ".markdown": DocumentFormat.MARKDOWN,
1416
+ ".html": DocumentFormat.HTML,
1417
+ ".htm": DocumentFormat.HTML,
1418
+ ".txt": DocumentFormat.PLAIN_TEXT,
1419
+ ".json": DocumentFormat.JSON,
1420
+ ".xml": DocumentFormat.XML,
1421
+ ".tex": DocumentFormat.LATEX,
1422
+ ".docx": DocumentFormat.DOCX,
1423
+ ".pdf": DocumentFormat.PDF,
1424
+ ".pptx": DocumentFormat.PPTX,
1425
+ ".ppt": DocumentFormat.PPT,
1426
+ }
1427
+ return format_map.get(ext, DocumentFormat.PLAIN_TEXT)
1428
+
1429
+ def _generate_format_specific_metadata(self, metadata: Dict[str, Any], file_format: DocumentFormat) -> str:
1430
+ """Generate format-specific metadata"""
1431
+ if file_format == DocumentFormat.MARKDOWN:
1432
+ return "---\n" + "\n".join([f"{k}: {v}" for k, v in metadata.items()]) + "\n---"
1433
+ elif file_format == DocumentFormat.HTML:
1434
+ meta_tags = "\n".join([f'<meta name="{k}" content="{v}">' for k, v in metadata.items()])
1435
+ return f"<head>\n{meta_tags}\n</head>"
1436
+ elif file_format == DocumentFormat.LATEX:
1437
+ return "\n".join([f"\\{k}{{{v}}}" for k, v in metadata.items()])
1438
+ else:
1439
+ return self._generate_generic_metadata(metadata)
1440
+
1441
+ def _generate_generic_metadata(self, metadata: Dict[str, Any]) -> str:
1442
+ """Generate generic metadata"""
1443
+ return "% " + "\n% ".join([f"{k}: {v}" for k, v in metadata.items()])
1444
+
1445
+ def _insert_metadata_into_document(
1446
+ self,
1447
+ document_path: str,
1448
+ metadata_content: str,
1449
+ file_format: DocumentFormat,
1450
+ ):
1451
+ """Insert metadata into document"""
1452
+ with open(document_path, "r", encoding="utf-8") as f:
1453
+ content = f.read()
1454
+
1455
+ # Insert metadata at the beginning
1456
+ if file_format == DocumentFormat.HTML and "<head>" in content:
1457
+ # Insert into existing head section
1458
+ content = content.replace("<head>", f"<head>\n{metadata_content}")
1459
+ else:
1460
+ # Insert at the beginning
1461
+ content = metadata_content + "\n\n" + content
1462
+
1463
+ with open(document_path, "w", encoding="utf-8") as f:
1464
+ f.write(content)