aiecs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (90) hide show
  1. aiecs/__init__.py +75 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +295 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +341 -0
  7. aiecs/config/__init__.py +15 -0
  8. aiecs/config/config.py +117 -0
  9. aiecs/config/registry.py +19 -0
  10. aiecs/core/__init__.py +46 -0
  11. aiecs/core/interface/__init__.py +34 -0
  12. aiecs/core/interface/execution_interface.py +150 -0
  13. aiecs/core/interface/storage_interface.py +214 -0
  14. aiecs/domain/__init__.py +20 -0
  15. aiecs/domain/context/__init__.py +28 -0
  16. aiecs/domain/context/content_engine.py +982 -0
  17. aiecs/domain/context/conversation_models.py +306 -0
  18. aiecs/domain/execution/__init__.py +12 -0
  19. aiecs/domain/execution/model.py +49 -0
  20. aiecs/domain/task/__init__.py +13 -0
  21. aiecs/domain/task/dsl_processor.py +460 -0
  22. aiecs/domain/task/model.py +50 -0
  23. aiecs/domain/task/task_context.py +257 -0
  24. aiecs/infrastructure/__init__.py +26 -0
  25. aiecs/infrastructure/messaging/__init__.py +13 -0
  26. aiecs/infrastructure/messaging/celery_task_manager.py +341 -0
  27. aiecs/infrastructure/messaging/websocket_manager.py +289 -0
  28. aiecs/infrastructure/monitoring/__init__.py +12 -0
  29. aiecs/infrastructure/monitoring/executor_metrics.py +138 -0
  30. aiecs/infrastructure/monitoring/structured_logger.py +50 -0
  31. aiecs/infrastructure/monitoring/tracing_manager.py +376 -0
  32. aiecs/infrastructure/persistence/__init__.py +12 -0
  33. aiecs/infrastructure/persistence/database_manager.py +286 -0
  34. aiecs/infrastructure/persistence/file_storage.py +671 -0
  35. aiecs/infrastructure/persistence/redis_client.py +162 -0
  36. aiecs/llm/__init__.py +54 -0
  37. aiecs/llm/base_client.py +99 -0
  38. aiecs/llm/client_factory.py +339 -0
  39. aiecs/llm/custom_callbacks.py +228 -0
  40. aiecs/llm/openai_client.py +125 -0
  41. aiecs/llm/vertex_client.py +186 -0
  42. aiecs/llm/xai_client.py +184 -0
  43. aiecs/main.py +351 -0
  44. aiecs/scripts/DEPENDENCY_SYSTEM_SUMMARY.md +241 -0
  45. aiecs/scripts/README_DEPENDENCY_CHECKER.md +309 -0
  46. aiecs/scripts/README_WEASEL_PATCH.md +126 -0
  47. aiecs/scripts/__init__.py +3 -0
  48. aiecs/scripts/dependency_checker.py +825 -0
  49. aiecs/scripts/dependency_fixer.py +348 -0
  50. aiecs/scripts/download_nlp_data.py +348 -0
  51. aiecs/scripts/fix_weasel_validator.py +121 -0
  52. aiecs/scripts/fix_weasel_validator.sh +82 -0
  53. aiecs/scripts/patch_weasel_library.sh +188 -0
  54. aiecs/scripts/quick_dependency_check.py +269 -0
  55. aiecs/scripts/run_weasel_patch.sh +41 -0
  56. aiecs/scripts/setup_nlp_data.sh +217 -0
  57. aiecs/tasks/__init__.py +2 -0
  58. aiecs/tasks/worker.py +111 -0
  59. aiecs/tools/__init__.py +196 -0
  60. aiecs/tools/base_tool.py +202 -0
  61. aiecs/tools/langchain_adapter.py +361 -0
  62. aiecs/tools/task_tools/__init__.py +82 -0
  63. aiecs/tools/task_tools/chart_tool.py +704 -0
  64. aiecs/tools/task_tools/classfire_tool.py +901 -0
  65. aiecs/tools/task_tools/image_tool.py +397 -0
  66. aiecs/tools/task_tools/office_tool.py +600 -0
  67. aiecs/tools/task_tools/pandas_tool.py +565 -0
  68. aiecs/tools/task_tools/report_tool.py +499 -0
  69. aiecs/tools/task_tools/research_tool.py +363 -0
  70. aiecs/tools/task_tools/scraper_tool.py +548 -0
  71. aiecs/tools/task_tools/search_api.py +7 -0
  72. aiecs/tools/task_tools/stats_tool.py +513 -0
  73. aiecs/tools/temp_file_manager.py +126 -0
  74. aiecs/tools/tool_executor/__init__.py +35 -0
  75. aiecs/tools/tool_executor/tool_executor.py +518 -0
  76. aiecs/utils/LLM_output_structor.py +409 -0
  77. aiecs/utils/__init__.py +23 -0
  78. aiecs/utils/base_callback.py +50 -0
  79. aiecs/utils/execution_utils.py +158 -0
  80. aiecs/utils/logging.py +1 -0
  81. aiecs/utils/prompt_loader.py +13 -0
  82. aiecs/utils/token_usage_repository.py +279 -0
  83. aiecs/ws/__init__.py +0 -0
  84. aiecs/ws/socket_server.py +41 -0
  85. aiecs-1.0.0.dist-info/METADATA +610 -0
  86. aiecs-1.0.0.dist-info/RECORD +90 -0
  87. aiecs-1.0.0.dist-info/WHEEL +5 -0
  88. aiecs-1.0.0.dist-info/entry_points.txt +7 -0
  89. aiecs-1.0.0.dist-info/licenses/LICENSE +225 -0
  90. aiecs-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,363 @@
1
+ import logging
2
+ from typing import Dict, Any, List, Optional, Tuple
3
+ import spacy
4
+ from spacy.language import Language
5
+ from pydantic import BaseModel, ValidationError, ConfigDict
6
+ from pydantic_settings import BaseSettings
7
+ from collections import Counter
8
+ from scipy.stats import pearsonr
9
+ import os
10
+
11
+ from aiecs.tools.base_tool import BaseTool
12
+ from aiecs.tools import register_tool
13
+
14
+ # Configuration for ResearchTool
15
+ class ResearchSettings(BaseSettings):
16
+ """
17
+ Configuration for ResearchTool.
18
+
19
+ Attributes:
20
+ max_workers (int): Maximum number of thread pool workers.
21
+ spacy_model (str): Default spaCy model to use.
22
+ max_text_length (int): Maximum text length for inputs.
23
+ allowed_spacy_models (List[str]): Allowed spaCy models.
24
+ env_prefix (str): Environment variable prefix.
25
+ """
26
+ max_workers: int = min(32, (os.cpu_count() or 4) * 2)
27
+ spacy_model: str = "en_core_web_sm"
28
+ max_text_length: int = 10_000
29
+ allowed_spacy_models: List[str] = ["en_core_web_sm", "zh_core_web_sm"]
30
+ env_prefix: str = 'RESEARCH_TOOL_'
31
+
32
+ model_config = ConfigDict(env_prefix='RESEARCH_TOOL_')
33
+
34
+ # Exceptions
35
+ class ResearchToolError(Exception):
36
+ """Base exception for ResearchTool errors."""
37
+ pass
38
+
39
+ class FileOperationError(ResearchToolError):
40
+ """Raised when file operations fail."""
41
+ pass
42
+
43
+ @register_tool('research')
44
+ class ResearchTool(BaseTool):
45
+ """
46
+ Tool for causal inference using Mill's methods, advanced induction, deduction, and text summarization.
47
+
48
+ Operations:
49
+ - mill_agreement: Identify common factors in positive cases.
50
+ - mill_difference: Identify factors present in positive but absent in negative cases.
51
+ - mill_joint: Combine agreement and difference methods.
52
+ - mill_residues: Identify residual causes after accounting for known causes.
53
+ - mill_concomitant: Analyze correlation between factor and effect variations.
54
+ - induction: Generalize patterns using spaCy-based clustering.
55
+ - deduction: Validate conclusions using spaCy-based rule reasoning.
56
+ - summarize: Summarize text using spaCy sentence ranking.
57
+
58
+ Inherits from BaseTool.
59
+ """
60
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
61
+ """
62
+ Initialize ResearchTool with settings and resources.
63
+
64
+ Args:
65
+ config (Dict, optional): Configuration overrides for ResearchSettings.
66
+
67
+ Raises:
68
+ ValueError: If config contains invalid settings.
69
+ """
70
+ super().__init__(config)
71
+ self.settings = ResearchSettings()
72
+ if config:
73
+ try:
74
+ self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
75
+ except ValidationError as e:
76
+ raise ValueError(f"Invalid configuration: {e}")
77
+ self.logger = logging.getLogger(__name__)
78
+ if not self.logger.handlers:
79
+ handler = logging.StreamHandler()
80
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
81
+ self.logger.addHandler(handler)
82
+ self.logger.setLevel(logging.INFO)
83
+ self._spacy_nlp: Optional[Language] = None
84
+
85
+ def __del__(self):
86
+ """Clean up resources when the object is destroyed."""
87
+ if hasattr(self, '_spacy_nlp') and self._spacy_nlp is not None:
88
+ self._spacy_nlp = None
89
+
90
+ def _get_spacy(self) -> Language:
91
+ """
92
+ Get or cache a spaCy pipeline.
93
+
94
+ Returns:
95
+ Language: spaCy NLP object.
96
+
97
+ Raises:
98
+ ResearchToolError: If the spaCy model is invalid.
99
+ """
100
+ if self._spacy_nlp is None:
101
+ if self.settings.spacy_model not in self.settings.allowed_spacy_models:
102
+ raise ResearchToolError(f"Invalid spaCy model '{self.settings.spacy_model}', expected {self.settings.allowed_spacy_models}")
103
+ self._spacy_nlp = spacy.load(self.settings.spacy_model, disable=["textcat"])
104
+ return self._spacy_nlp
105
+
106
+ def mill_agreement(self, cases: List[Dict[str, Any]]) -> Dict[str, Any]:
107
+ """
108
+ Identify attribute(s) common to all cases with a positive outcome using Mill's Method of Agreement.
109
+
110
+ Args:
111
+ cases (List[Dict[str, Any]]): List of cases with attributes and outcomes.
112
+
113
+ Returns:
114
+ Dict[str, Any]: Common factors {'common_factors': List[str]}.
115
+
116
+ Raises:
117
+ FileOperationError: If processing fails.
118
+ """
119
+ try:
120
+ truthy = [c['attrs'] for c in cases if c.get('outcome')]
121
+ if not truthy:
122
+ return {'common_factors': []}
123
+ common = set(k for k, v in truthy[0].items() if v)
124
+ for attrs in truthy[1:]:
125
+ common &= set(k for k, v in attrs.items() if v)
126
+ return {'common_factors': list(common)}
127
+ except Exception as e:
128
+ raise FileOperationError(f"Failed to process mill_agreement: {str(e)}")
129
+
130
+ def mill_difference(self, positive_case: Dict[str, Any], negative_case: Dict[str, Any]) -> Dict[str, Any]:
131
+ """
132
+ Find attribute(s) present in positive case but absent in negative case using Mill's Method of Difference.
133
+
134
+ Args:
135
+ positive_case (Dict[str, Any]): Positive case with attributes and outcome.
136
+ negative_case (Dict[str, Any]): Negative case with attributes and outcome.
137
+
138
+ Returns:
139
+ Dict[str, Any]: Difference factors {'difference_factors': List[str]}.
140
+
141
+ Raises:
142
+ FileOperationError: If processing fails.
143
+ """
144
+ try:
145
+ pos = {k for k, v in positive_case.get('attrs', {}).items() if v}
146
+ neg = {k for k, v in negative_case.get('attrs', {}).items() if v}
147
+ diff = pos - neg
148
+ return {'difference_factors': list(diff)}
149
+ except Exception as e:
150
+ raise FileOperationError(f"Failed to process mill_difference: {str(e)}")
151
+
152
+ def mill_joint(self, positive_cases: List[Dict[str, Any]], negative_cases: List[Dict[str, Any]]) -> Dict[str, Any]:
153
+ """
154
+ Combine Mill's Method of Agreement and Difference to identify causal factors.
155
+
156
+ Args:
157
+ positive_cases (List[Dict[str, Any]]): List of positive cases.
158
+ negative_cases (List[Dict[str, Any]]): List of negative cases.
159
+
160
+ Returns:
161
+ Dict[str, Any]: Causal factors {'causal_factors': List[str]}.
162
+
163
+ Raises:
164
+ FileOperationError: If processing fails.
165
+ """
166
+ try:
167
+ truthy = [c['attrs'] for c in positive_cases if c.get('outcome')]
168
+ if not truthy:
169
+ return {'causal_factors': []}
170
+ common = set(k for k, v in truthy[0].items() if v)
171
+ for attrs in truthy[1:]:
172
+ common &= set(k for k, v in attrs.items() if v)
173
+ falsy = [c['attrs'] for c in negative_cases if not c.get('outcome')]
174
+ if not falsy:
175
+ return {'causal_factors': list(common)}
176
+ for attrs in falsy:
177
+ common -= set(k for k, v in attrs.items() if v)
178
+ return {'causal_factors': list(common)}
179
+ except Exception as e:
180
+ raise FileOperationError(f"Failed to process mill_joint: {str(e)}")
181
+
182
+ def mill_residues(self, cases: List[Dict[str, Any]], known_causes: Dict[str, List[str]]) -> Dict[str, Any]:
183
+ """
184
+ Identify residual causes after accounting for known causes using Mill's Method of Residues.
185
+
186
+ Args:
187
+ cases (List[Dict[str, Any]]): List of cases with attributes and effects.
188
+ known_causes (Dict[str, List[str]]): Known causes for effects.
189
+
190
+ Returns:
191
+ Dict[str, Any]: Residual causes {'residual_causes': Dict[str, List[str]]}.
192
+
193
+ Raises:
194
+ FileOperationError: If processing fails.
195
+ """
196
+ try:
197
+ residual = {}
198
+ for case in cases:
199
+ effects = case.get('effects', {})
200
+ attrs = set(k for k, v in case.get('attrs', {}).items() if v)
201
+ for effect in effects:
202
+ if effect in known_causes:
203
+ known = set(known_causes[effect])
204
+ residual[effect] = list(attrs - known)
205
+ else:
206
+ residual[effect] = list(attrs)
207
+ return {'residual_causes': residual}
208
+ except Exception as e:
209
+ raise FileOperationError(f"Failed to process mill_residues: {str(e)}")
210
+
211
+ def mill_concomitant(self, cases: List[Dict[str, Any]], factor: str, effect: str) -> Dict[str, Any]:
212
+ """
213
+ Analyze correlation between factor and effect variations using Mill's Method of Concomitant Variations.
214
+
215
+ Args:
216
+ cases (List[Dict[str, Any]]): List of cases with attributes.
217
+ factor (str): Factor to analyze.
218
+ effect (str): Effect to analyze.
219
+
220
+ Returns:
221
+ Dict[str, Any]: Correlation results {'correlation': float, 'pvalue': float}.
222
+
223
+ Raises:
224
+ FileOperationError: If processing fails.
225
+ """
226
+ try:
227
+ factor_vals = [case['attrs'].get(factor, 0) for case in cases]
228
+ effect_vals = [case['attrs'].get(effect, 0) for case in cases]
229
+ if len(factor_vals) < 2:
230
+ return {'correlation': 0.0, 'pvalue': 1.0}
231
+
232
+ # Convert to numpy arrays to avoid PyTorch compatibility issues
233
+ import numpy as np
234
+ factor_array = np.array(factor_vals, dtype=np.float64)
235
+ effect_array = np.array(effect_vals, dtype=np.float64)
236
+
237
+ # Calculate correlation using numpy if scipy fails
238
+ try:
239
+ corr, pval = pearsonr(factor_array, effect_array)
240
+ except (AttributeError, ImportError) as e:
241
+ # Fallback to numpy correlation calculation
242
+ self.logger.warning(f"scipy pearsonr failed ({e}), using numpy fallback")
243
+ corr = np.corrcoef(factor_array, effect_array)[0, 1]
244
+ # Simple p-value approximation (not statistically rigorous but functional)
245
+ n = len(factor_array)
246
+ if n <= 2:
247
+ pval = 1.0
248
+ else:
249
+ # Approximate p-value using t-distribution
250
+ t_stat = corr * np.sqrt((n - 2) / (1 - corr**2 + 1e-10))
251
+ from scipy.stats import t
252
+ pval = 2 * (1 - t.cdf(abs(t_stat), n - 2))
253
+
254
+ return {'correlation': float(corr), 'pvalue': float(pval)}
255
+ except Exception as e:
256
+ raise FileOperationError(f"Failed to process mill_concomitant: {str(e)}")
257
+
258
+ def induction(self, examples: List[str], max_keywords: int = 10) -> Dict[str, Any]:
259
+ """
260
+ Generalize patterns from examples using spaCy-based noun phrase clustering.
261
+
262
+ Args:
263
+ examples (List[str]): List of example texts.
264
+ max_keywords (int): Maximum number of keywords to extract.
265
+
266
+ Returns:
267
+ Dict[str, Any]: Generalized patterns {'patterns': List[str]}.
268
+
269
+ Raises:
270
+ FileOperationError: If induction fails.
271
+ """
272
+ try:
273
+ nlp = self._get_spacy()
274
+ docs = [nlp(ex) for ex in examples]
275
+ patterns = []
276
+ for doc in docs:
277
+ patterns.extend([chunk.text.lower() for chunk in doc.noun_chunks])
278
+ patterns.extend([token.lemma_.lower() for token in doc if token.pos_ == 'VERB'])
279
+ counter = Counter(patterns)
280
+ common = [word for word, count in counter.most_common() if count > 1][:max_keywords]
281
+ return {'patterns': common}
282
+ except Exception as e:
283
+ raise FileOperationError(f"Failed to process induction: {str(e)}")
284
+
285
+ def deduction(self, premises: List[str], conclusion: Optional[str]) -> Dict[str, Any]:
286
+ """
287
+ Validate if conclusion logically follows premises using spaCy dependency parsing.
288
+
289
+ Args:
290
+ premises (List[str]): List of premise statements.
291
+ conclusion (Optional[str]): Conclusion to validate.
292
+
293
+ Returns:
294
+ Dict[str, Any]: Validation result {'valid': bool, 'conclusion': str, 'reason': str}.
295
+
296
+ Raises:
297
+ FileOperationError: If deduction fails.
298
+ """
299
+ try:
300
+ nlp = self._get_spacy()
301
+ premises_docs = [nlp(p) for p in premises]
302
+ conclusion_doc = nlp(conclusion) if conclusion else None
303
+ if not conclusion_doc:
304
+ return {'valid': False, 'conclusion': None, 'reason': 'No conclusion provided'}
305
+ premise_entities = set()
306
+ premise_predicates = set()
307
+ for doc in premises_docs:
308
+ premise_entities.update(ent.text.lower() for ent in doc.ents)
309
+ premise_predicates.update(token.lemma_.lower() for token in doc if token.pos_ == 'VERB')
310
+ conclusion_entities = set(ent.text.lower() for ent in conclusion_doc.ents)
311
+ conclusion_predicates = set(token.lemma_.lower() for token in conclusion_doc if token.pos_ == 'VERB')
312
+ entities_valid = conclusion_entities.issubset(premise_entities)
313
+ predicates_valid = conclusion_predicates.issubset(premise_predicates)
314
+ valid = entities_valid and predicates_valid
315
+ reason = (
316
+ "Conclusion matches premise patterns."
317
+ if valid
318
+ else f"Conclusion contains unmatched {'entities' if not entities_valid else ''} "
319
+ f"{'and ' if not entities_valid and not predicates_valid else ''}"
320
+ f"{'predicates' if not predicates_valid else ''}."
321
+ )
322
+ return {'valid': valid, 'conclusion': conclusion, 'reason': reason}
323
+ except Exception as e:
324
+ raise FileOperationError(f"Failed to process deduction: {str(e)}")
325
+
326
+ def summarize(self, text: str, max_length: int = 150, language: Optional[str] = None) -> str:
327
+ """
328
+ Summarize text using spaCy-based sentence ranking.
329
+
330
+ Args:
331
+ text (str): Text to summarize.
332
+ max_length (int): Maximum length of the summary.
333
+ language (Optional[str]): Language of the text.
334
+
335
+ Returns:
336
+ str: Summarized text.
337
+
338
+ Raises:
339
+ FileOperationError: If summarization fails.
340
+ """
341
+ try:
342
+ nlp = self._get_spacy()
343
+ doc = nlp(text)
344
+ sentences = [sent.text for sent in doc.sents]
345
+ if not sentences:
346
+ return ""
347
+ keywords = [token.lemma_.lower() for token in doc if token.pos_ in ('NOUN', 'VERB', 'ADJ') and not token.is_stop]
348
+ keyword_freq = Counter(keywords)
349
+ scores = []
350
+ for sent in sentences:
351
+ sent_doc = nlp(sent)
352
+ sent_keywords = [token.lemma_.lower() for token in sent_doc if token.pos_ in ('NOUN', 'VERB', 'ADJ')]
353
+ score = sum(keyword_freq.get(k, 0) for k in sent_keywords) / (len(sent_keywords) + 1)
354
+ scores.append((sent, score))
355
+ scores.sort(key=lambda x: x[1], reverse=True)
356
+ selected = [sent for sent, _ in scores[:max(1, max_length // 50)]]
357
+ summary = ' '.join(selected)
358
+ words = summary.split()
359
+ if len(words) > max_length:
360
+ summary = ' '.join(words[:max_length]) + '...'
361
+ return summary
362
+ except Exception as e:
363
+ raise FileOperationError(f"Failed to process summarize: {str(e)}")