devsper 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devsper/__init__.py +14 -0
- devsper/agents/a2a/__init__.py +27 -0
- devsper/agents/a2a/client.py +126 -0
- devsper/agents/a2a/discovery.py +24 -0
- devsper/agents/a2a/server.py +128 -0
- devsper/agents/a2a/tool_adapter.py +68 -0
- devsper/agents/a2a/types.py +49 -0
- devsper/agents/agent.py +602 -0
- devsper/agents/critic.py +80 -0
- devsper/agents/message_bus.py +124 -0
- devsper/agents/roles.py +181 -0
- devsper/agents/run_agent.py +78 -0
- devsper/analytics/__init__.py +5 -0
- devsper/analytics/tool_analytics.py +78 -0
- devsper/audit/__init__.py +5 -0
- devsper/audit/logger.py +214 -0
- devsper/bus/__init__.py +29 -0
- devsper/bus/backends/__init__.py +5 -0
- devsper/bus/backends/base.py +38 -0
- devsper/bus/backends/memory.py +55 -0
- devsper/bus/backends/redis.py +146 -0
- devsper/bus/message.py +56 -0
- devsper/bus/schema_version.py +3 -0
- devsper/bus/topics.py +19 -0
- devsper/cache/__init__.py +6 -0
- devsper/cache/embedding_index.py +98 -0
- devsper/cache/hashing.py +24 -0
- devsper/cache/store.py +153 -0
- devsper/cache/task_cache.py +191 -0
- devsper/cli/__init__.py +6 -0
- devsper/cli/commands/reg.py +733 -0
- devsper/cli/github_oauth.py +157 -0
- devsper/cli/init.py +637 -0
- devsper/cli/main.py +2956 -0
- devsper/cli/run_progress.py +103 -0
- devsper/cli/ui/__init__.py +65 -0
- devsper/cli/ui/components.py +94 -0
- devsper/cli/ui/errors.py +104 -0
- devsper/cli/ui/logging.py +120 -0
- devsper/cli/ui/onboarding.py +102 -0
- devsper/cli/ui/progress.py +43 -0
- devsper/cli/ui/run_view.py +308 -0
- devsper/cli/ui/theme.py +40 -0
- devsper/cluster/__init__.py +29 -0
- devsper/cluster/election.py +84 -0
- devsper/cluster/local.py +97 -0
- devsper/cluster/node_info.py +77 -0
- devsper/cluster/registry.py +71 -0
- devsper/cluster/router.py +117 -0
- devsper/cluster/state_backend.py +105 -0
- devsper/compliance/__init__.py +5 -0
- devsper/compliance/pii.py +147 -0
- devsper/config/__init__.py +52 -0
- devsper/config/config_loader.py +121 -0
- devsper/config/defaults.py +77 -0
- devsper/config/resolver.py +342 -0
- devsper/config/schema.py +237 -0
- devsper/credentials/__init__.py +19 -0
- devsper/credentials/cli.py +197 -0
- devsper/credentials/migration.py +124 -0
- devsper/credentials/store.py +142 -0
- devsper/dashboard/__init__.py +9 -0
- devsper/dashboard/dashboard.py +87 -0
- devsper/dev/__init__.py +25 -0
- devsper/dev/builder.py +195 -0
- devsper/dev/debugger.py +95 -0
- devsper/dev/repo_index.py +138 -0
- devsper/dev/sandbox.py +203 -0
- devsper/dev/scaffold.py +122 -0
- devsper/embeddings/__init__.py +5 -0
- devsper/embeddings/service.py +36 -0
- devsper/explainability/__init__.py +14 -0
- devsper/explainability/decision_tree.py +104 -0
- devsper/explainability/rationale.py +38 -0
- devsper/explainability/simulation.py +56 -0
- devsper/hitl/__init__.py +13 -0
- devsper/hitl/approval.py +160 -0
- devsper/hitl/escalation.py +95 -0
- devsper/intelligence/__init__.py +9 -0
- devsper/intelligence/adaptation.py +88 -0
- devsper/intelligence/analysis/__init__.py +19 -0
- devsper/intelligence/analysis/analyzer.py +71 -0
- devsper/intelligence/analysis/cost_estimator.py +66 -0
- devsper/intelligence/analysis/formatter.py +103 -0
- devsper/intelligence/analysis/run_report.py +402 -0
- devsper/intelligence/learning_engine.py +92 -0
- devsper/intelligence/strategies/__init__.py +23 -0
- devsper/intelligence/strategies/base.py +14 -0
- devsper/intelligence/strategies/code_analysis_strategy.py +33 -0
- devsper/intelligence/strategies/data_science_strategy.py +33 -0
- devsper/intelligence/strategies/document_pipeline_strategy.py +33 -0
- devsper/intelligence/strategies/experiment_strategy.py +33 -0
- devsper/intelligence/strategies/research_strategy.py +34 -0
- devsper/intelligence/strategy_selector.py +84 -0
- devsper/intelligence/synthesis.py +132 -0
- devsper/intelligence/task_optimizer.py +92 -0
- devsper/knowledge/__init__.py +5 -0
- devsper/knowledge/extractor.py +204 -0
- devsper/knowledge/knowledge_graph.py +184 -0
- devsper/knowledge/query.py +285 -0
- devsper/memory/__init__.py +35 -0
- devsper/memory/consolidation.py +138 -0
- devsper/memory/embeddings.py +60 -0
- devsper/memory/memory_index.py +97 -0
- devsper/memory/memory_router.py +62 -0
- devsper/memory/memory_store.py +221 -0
- devsper/memory/memory_types.py +54 -0
- devsper/memory/namespaces.py +45 -0
- devsper/memory/scoring.py +77 -0
- devsper/memory/summarizer.py +52 -0
- devsper/nodes/__init__.py +5 -0
- devsper/nodes/controller.py +449 -0
- devsper/nodes/rpc.py +127 -0
- devsper/nodes/single.py +161 -0
- devsper/nodes/worker.py +506 -0
- devsper/orchestration/__init__.py +19 -0
- devsper/orchestration/meta_planner.py +239 -0
- devsper/orchestration/priority_queue.py +61 -0
- devsper/plugins/__init__.py +19 -0
- devsper/plugins/marketplace/__init__.py +0 -0
- devsper/plugins/plugin_loader.py +70 -0
- devsper/plugins/plugin_registry.py +34 -0
- devsper/plugins/registry.py +83 -0
- devsper/protocols/__init__.py +6 -0
- devsper/providers/__init__.py +17 -0
- devsper/providers/anthropic.py +84 -0
- devsper/providers/base.py +75 -0
- devsper/providers/complexity_router.py +94 -0
- devsper/providers/gemini.py +36 -0
- devsper/providers/github.py +180 -0
- devsper/providers/model_router.py +40 -0
- devsper/providers/openai.py +105 -0
- devsper/providers/router/__init__.py +21 -0
- devsper/providers/router/backends/__init__.py +19 -0
- devsper/providers/router/backends/anthropic_backend.py +111 -0
- devsper/providers/router/backends/custom_backend.py +138 -0
- devsper/providers/router/backends/gemini_backend.py +89 -0
- devsper/providers/router/backends/github_backend.py +165 -0
- devsper/providers/router/backends/ollama_backend.py +104 -0
- devsper/providers/router/backends/openai_backend.py +142 -0
- devsper/providers/router/backends/vllm_backend.py +35 -0
- devsper/providers/router/base.py +60 -0
- devsper/providers/router/factory.py +92 -0
- devsper/providers/router/legacy.py +101 -0
- devsper/providers/router/router.py +135 -0
- devsper/reasoning/__init__.py +12 -0
- devsper/reasoning/graph.py +59 -0
- devsper/reasoning/nodes.py +20 -0
- devsper/reasoning/store.py +67 -0
- devsper/runtime/__init__.py +12 -0
- devsper/runtime/health.py +88 -0
- devsper/runtime/replay.py +53 -0
- devsper/runtime/replay_engine.py +142 -0
- devsper/runtime/run_history.py +204 -0
- devsper/runtime/telemetry.py +116 -0
- devsper/runtime/visualize.py +58 -0
- devsper/sandbox/__init__.py +13 -0
- devsper/sandbox/sandbox.py +161 -0
- devsper/swarm/checkpointer.py +65 -0
- devsper/swarm/executor.py +558 -0
- devsper/swarm/map_reduce.py +44 -0
- devsper/swarm/planner.py +197 -0
- devsper/swarm/prefetcher.py +91 -0
- devsper/swarm/scheduler.py +153 -0
- devsper/swarm/speculation.py +47 -0
- devsper/swarm/swarm.py +562 -0
- devsper/tools/__init__.py +33 -0
- devsper/tools/base.py +29 -0
- devsper/tools/code_intelligence/__init__.py +13 -0
- devsper/tools/code_intelligence/api_surface_extractor.py +73 -0
- devsper/tools/code_intelligence/architecture_analyzer.py +65 -0
- devsper/tools/code_intelligence/codebase_indexer.py +71 -0
- devsper/tools/code_intelligence/dependency_graph_builder.py +67 -0
- devsper/tools/code_intelligence/design_pattern_detector.py +62 -0
- devsper/tools/code_intelligence/large_function_detector.py +68 -0
- devsper/tools/code_intelligence/module_responsibility_mapper.py +56 -0
- devsper/tools/code_intelligence/parallel_codebase_analysis.py +44 -0
- devsper/tools/code_intelligence/refactor_candidate_detector.py +81 -0
- devsper/tools/code_intelligence/repository_semantic_index.py +61 -0
- devsper/tools/code_intelligence/test_coverage_estimator.py +62 -0
- devsper/tools/coding/__init__.py +12 -0
- devsper/tools/coding/analyze_code_complexity.py +48 -0
- devsper/tools/coding/dependency_analyzer.py +42 -0
- devsper/tools/coding/extract_functions.py +38 -0
- devsper/tools/coding/format_python.py +50 -0
- devsper/tools/coding/generate_docstrings.py +40 -0
- devsper/tools/coding/generate_unit_tests.py +42 -0
- devsper/tools/coding/lint_python.py +51 -0
- devsper/tools/coding/refactor_function.py +41 -0
- devsper/tools/coding/repo_structure_map.py +54 -0
- devsper/tools/coding/run_python.py +53 -0
- devsper/tools/data/__init__.py +12 -0
- devsper/tools/data/column_type_detection.py +64 -0
- devsper/tools/data/csv_summary.py +52 -0
- devsper/tools/data/dataframe_filter.py +51 -0
- devsper/tools/data/dataframe_groupby.py +47 -0
- devsper/tools/data/dataframe_stats.py +38 -0
- devsper/tools/data/dataset_sampling.py +55 -0
- devsper/tools/data/dataset_schema.py +45 -0
- devsper/tools/data/json_pretty_print.py +37 -0
- devsper/tools/data/json_query.py +46 -0
- devsper/tools/data/missing_value_report.py +47 -0
- devsper/tools/data_science/__init__.py +13 -0
- devsper/tools/data_science/correlation_heatmap.py +72 -0
- devsper/tools/data_science/dataset_bias_detector.py +49 -0
- devsper/tools/data_science/dataset_distribution_report.py +64 -0
- devsper/tools/data_science/dataset_drift_detector.py +64 -0
- devsper/tools/data_science/dataset_outlier_detector.py +65 -0
- devsper/tools/data_science/dataset_profile.py +76 -0
- devsper/tools/data_science/distributed_dataset_processor.py +54 -0
- devsper/tools/data_science/feature_engineering_suggestions.py +69 -0
- devsper/tools/data_science/feature_importance_estimator.py +82 -0
- devsper/tools/data_science/model_input_validator.py +59 -0
- devsper/tools/data_science/time_series_analyzer.py +57 -0
- devsper/tools/documents/__init__.py +11 -0
- devsper/tools/documents/_docproc.py +56 -0
- devsper/tools/documents/document_to_markdown.py +29 -0
- devsper/tools/documents/extract_document_images.py +39 -0
- devsper/tools/documents/extract_document_text.py +29 -0
- devsper/tools/documents/extract_equations.py +36 -0
- devsper/tools/documents/extract_tables.py +47 -0
- devsper/tools/documents/summarize_document.py +42 -0
- devsper/tools/documents/write_latex_document.py +133 -0
- devsper/tools/documents/write_markdown_document.py +89 -0
- devsper/tools/documents/write_word_document.py +149 -0
- devsper/tools/experiments/__init__.py +13 -0
- devsper/tools/experiments/bootstrap_estimator.py +54 -0
- devsper/tools/experiments/experiment_report_generator.py +50 -0
- devsper/tools/experiments/experiment_tracker.py +36 -0
- devsper/tools/experiments/grid_search_runner.py +50 -0
- devsper/tools/experiments/model_benchmark_runner.py +45 -0
- devsper/tools/experiments/monte_carlo_experiment.py +38 -0
- devsper/tools/experiments/parameter_sweep_runner.py +51 -0
- devsper/tools/experiments/result_comparator.py +58 -0
- devsper/tools/experiments/simulation_runner.py +43 -0
- devsper/tools/experiments/statistical_significance_test.py +56 -0
- devsper/tools/experiments/swarm_map_reduce.py +42 -0
- devsper/tools/filesystem/__init__.py +12 -0
- devsper/tools/filesystem/append_file.py +42 -0
- devsper/tools/filesystem/file_hash.py +40 -0
- devsper/tools/filesystem/file_line_count.py +36 -0
- devsper/tools/filesystem/file_metadata.py +38 -0
- devsper/tools/filesystem/file_preview.py +55 -0
- devsper/tools/filesystem/find_large_files.py +50 -0
- devsper/tools/filesystem/list_directory.py +39 -0
- devsper/tools/filesystem/read_file.py +35 -0
- devsper/tools/filesystem/search_files.py +60 -0
- devsper/tools/filesystem/write_file.py +41 -0
- devsper/tools/flagship/__init__.py +15 -0
- devsper/tools/flagship/distributed_document_analysis.py +77 -0
- devsper/tools/flagship/docproc_corpus_pipeline.py +91 -0
- devsper/tools/flagship/repository_semantic_map.py +99 -0
- devsper/tools/flagship/research_graph_builder.py +111 -0
- devsper/tools/flagship/swarm_experiment_runner.py +86 -0
- devsper/tools/knowledge/__init__.py +10 -0
- devsper/tools/knowledge/citation_graph_builder.py +69 -0
- devsper/tools/knowledge/concept_frequency_analyzer.py +74 -0
- devsper/tools/knowledge/corpus_builder.py +66 -0
- devsper/tools/knowledge/cross_document_entity_linker.py +71 -0
- devsper/tools/knowledge/document_corpus_summary.py +68 -0
- devsper/tools/knowledge/document_topic_extractor.py +58 -0
- devsper/tools/knowledge/knowledge_graph_extractor.py +58 -0
- devsper/tools/knowledge/timeline_extractor.py +59 -0
- devsper/tools/math/__init__.py +12 -0
- devsper/tools/math/calculate_expression.py +52 -0
- devsper/tools/math/correlation.py +44 -0
- devsper/tools/math/distribution_summary.py +39 -0
- devsper/tools/math/histogram.py +53 -0
- devsper/tools/math/linear_regression.py +47 -0
- devsper/tools/math/matrix_multiply.py +38 -0
- devsper/tools/math/mean_std.py +35 -0
- devsper/tools/math/monte_carlo_simulation.py +43 -0
- devsper/tools/math/polynomial_fit.py +40 -0
- devsper/tools/math/random_sample.py +36 -0
- devsper/tools/mcp/__init__.py +23 -0
- devsper/tools/mcp/adapter.py +53 -0
- devsper/tools/mcp/client.py +235 -0
- devsper/tools/mcp/discovery.py +53 -0
- devsper/tools/memory/__init__.py +16 -0
- devsper/tools/memory/delete_memory.py +25 -0
- devsper/tools/memory/list_memory.py +34 -0
- devsper/tools/memory/search_memory.py +36 -0
- devsper/tools/memory/store_memory.py +47 -0
- devsper/tools/memory/summarize_memory.py +41 -0
- devsper/tools/memory/tag_memory.py +47 -0
- devsper/tools/pipelines.py +92 -0
- devsper/tools/registry.py +39 -0
- devsper/tools/research/__init__.py +12 -0
- devsper/tools/research/arxiv_download.py +55 -0
- devsper/tools/research/arxiv_search.py +58 -0
- devsper/tools/research/citation_extractor.py +35 -0
- devsper/tools/research/duckduckgo_search.py +42 -0
- devsper/tools/research/paper_metadata_extractor.py +45 -0
- devsper/tools/research/paper_summarizer.py +41 -0
- devsper/tools/research/research_question_generator.py +39 -0
- devsper/tools/research/topic_cluster.py +46 -0
- devsper/tools/research/web_search.py +47 -0
- devsper/tools/research/wikipedia_lookup.py +50 -0
- devsper/tools/research_advanced/__init__.py +14 -0
- devsper/tools/research_advanced/citation_context_extractor.py +60 -0
- devsper/tools/research_advanced/literature_review_generator.py +79 -0
- devsper/tools/research_advanced/methodology_extractor.py +58 -0
- devsper/tools/research_advanced/paper_contribution_extractor.py +50 -0
- devsper/tools/research_advanced/paper_dataset_identifier.py +49 -0
- devsper/tools/research_advanced/paper_method_comparator.py +62 -0
- devsper/tools/research_advanced/paper_similarity_search.py +69 -0
- devsper/tools/research_advanced/paper_trend_analyzer.py +69 -0
- devsper/tools/research_advanced/parallel_document_analyzer.py +56 -0
- devsper/tools/research_advanced/research_gap_finder.py +71 -0
- devsper/tools/research_advanced/research_topic_mapper.py +69 -0
- devsper/tools/research_advanced/swarm_literature_review.py +58 -0
- devsper/tools/scoring/__init__.py +52 -0
- devsper/tools/scoring/report.py +44 -0
- devsper/tools/scoring/scorer.py +39 -0
- devsper/tools/scoring/selector.py +61 -0
- devsper/tools/scoring/store.py +267 -0
- devsper/tools/selector.py +130 -0
- devsper/tools/system/__init__.py +12 -0
- devsper/tools/system/cpu_usage.py +22 -0
- devsper/tools/system/disk_usage.py +35 -0
- devsper/tools/system/environment_variables.py +29 -0
- devsper/tools/system/memory_usage.py +23 -0
- devsper/tools/system/pip_install.py +44 -0
- devsper/tools/system/pip_search.py +29 -0
- devsper/tools/system/process_list.py +34 -0
- devsper/tools/system/python_package_list.py +40 -0
- devsper/tools/system/run_shell_command.py +51 -0
- devsper/tools/system/system_info.py +26 -0
- devsper/tools/tool_runner.py +122 -0
- devsper/tui/__init__.py +5 -0
- devsper/tui/activity_feed_view.py +73 -0
- devsper/tui/adaptive_tasks_view.py +75 -0
- devsper/tui/agent_role_view.py +35 -0
- devsper/tui/app.py +395 -0
- devsper/tui/dashboard_screen.py +290 -0
- devsper/tui/dev_view.py +99 -0
- devsper/tui/inject_screen.py +73 -0
- devsper/tui/knowledge_graph_view.py +46 -0
- devsper/tui/layout.py +43 -0
- devsper/tui/logs_view.py +83 -0
- devsper/tui/memory_view.py +58 -0
- devsper/tui/performance_view.py +33 -0
- devsper/tui/reasoning_graph_view.py +39 -0
- devsper/tui/results_view.py +139 -0
- devsper/tui/swarm_view.py +37 -0
- devsper/tui/task_detail_screen.py +55 -0
- devsper/tui/task_view.py +103 -0
- devsper/types/event.py +97 -0
- devsper/types/exceptions.py +21 -0
- devsper/types/swarm.py +41 -0
- devsper/types/task.py +80 -0
- devsper/upgrade/__init__.py +21 -0
- devsper/upgrade/changelog.py +124 -0
- devsper/upgrade/cli.py +145 -0
- devsper/upgrade/installer.py +103 -0
- devsper/upgrade/notifier.py +52 -0
- devsper/upgrade/version_check.py +121 -0
- devsper/utils/event_logger.py +88 -0
- devsper/utils/http.py +43 -0
- devsper/utils/models.py +54 -0
- devsper/visualization/__init__.py +5 -0
- devsper/visualization/dag_export.py +67 -0
- devsper/workflow/__init__.py +18 -0
- devsper/workflow/conditions.py +157 -0
- devsper/workflow/context.py +108 -0
- devsper/workflow/loader.py +156 -0
- devsper/workflow/resolver.py +109 -0
- devsper/workflow/runner.py +562 -0
- devsper/workflow/schema.py +63 -0
- devsper/workflow/validator.py +128 -0
- devsper-2.1.6.dist-info/METADATA +346 -0
- devsper-2.1.6.dist-info/RECORD +375 -0
- devsper-2.1.6.dist-info/WHEEL +4 -0
- devsper-2.1.6.dist-info/entry_points.txt +3 -0
- devsper-2.1.6.dist-info/licenses/LICENSE +639 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Build a citation graph from document text: extract citations and link doc -> refs."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
CITE_PATTERNS = [
|
|
11
|
+
re.compile(r"\([^)]*?\b(?:et\s+al\.?|&\s*[^)]+)[^)]*?\d{4}[^)]*\)", re.I),
|
|
12
|
+
re.compile(r"\[\d+(?:\s*[-–,]\s*\d+)*\]"),
|
|
13
|
+
re.compile(r"\([A-Z][a-z]+(?:\s+et\s+al\.?)?,?\s*\d{4}\)"),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CitationGraphBuilderTool(Tool):
|
|
18
|
+
"""
|
|
19
|
+
Build a simple citation graph from multiple documents: nodes are docs, edges are citation refs.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
name = "citation_graph_builder"
|
|
23
|
+
description = "Build a citation graph from documents: extract citations and doc-to-ref links."
|
|
24
|
+
input_schema = {
|
|
25
|
+
"type": "object",
|
|
26
|
+
"properties": {
|
|
27
|
+
"file_paths": {
|
|
28
|
+
"type": "array",
|
|
29
|
+
"items": {"type": "string"},
|
|
30
|
+
"description": "List of paths to documents",
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
"required": ["file_paths"],
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def _extract_citations(self, text: str) -> list[str]:
|
|
37
|
+
refs = []
|
|
38
|
+
for pat in CITE_PATTERNS:
|
|
39
|
+
refs.extend(pat.findall(text))
|
|
40
|
+
return list(dict.fromkeys(refs))
|
|
41
|
+
|
|
42
|
+
def run(self, **kwargs) -> str:
|
|
43
|
+
file_paths = kwargs.get("file_paths")
|
|
44
|
+
if not file_paths or not isinstance(file_paths, list):
|
|
45
|
+
return "Error: file_paths must be a non-empty list of strings"
|
|
46
|
+
nodes = []
|
|
47
|
+
edges = []
|
|
48
|
+
for path in file_paths:
|
|
49
|
+
if not isinstance(path, str) or not path.strip():
|
|
50
|
+
continue
|
|
51
|
+
p = Path(path.strip()).resolve()
|
|
52
|
+
if not p.exists() or not p.is_file():
|
|
53
|
+
continue
|
|
54
|
+
content, err = run_docproc_to_markdown(str(p))
|
|
55
|
+
if err:
|
|
56
|
+
continue
|
|
57
|
+
text = content or ""
|
|
58
|
+
name = p.name
|
|
59
|
+
nodes.append(name)
|
|
60
|
+
refs = self._extract_citations(text)
|
|
61
|
+
for ref in refs:
|
|
62
|
+
edges.append({"from": name, "citation": ref})
|
|
63
|
+
result = {"nodes": nodes, "edges": edges, "edge_count": len(edges)}
|
|
64
|
+
import json
|
|
65
|
+
|
|
66
|
+
return json.dumps(result, indent=2)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
register(CitationGraphBuilderTool())
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Analyze concept (word) frequency across multiple documents."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
STOP = frozenset(
|
|
11
|
+
"a an the and or but in on at to for of with by from as is was are were been be have has had do does did will would could should may might must can this that these those it its i we they".split()
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ConceptFrequencyAnalyzerTool(Tool):
|
|
16
|
+
"""
|
|
17
|
+
Compute concept (word) frequency across a corpus and return global and per-doc stats.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
name = "concept_frequency_analyzer"
|
|
21
|
+
description = "Analyze concept/word frequency across multiple documents."
|
|
22
|
+
input_schema = {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"properties": {
|
|
25
|
+
"file_paths": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": {"type": "string"},
|
|
28
|
+
"description": "List of paths to documents",
|
|
29
|
+
},
|
|
30
|
+
"top_n": {"type": "integer", "description": "Top N concepts (default 20)"},
|
|
31
|
+
"min_word_length": {"type": "integer", "description": "Min concept length (default 4)"},
|
|
32
|
+
},
|
|
33
|
+
"required": ["file_paths"],
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def run(self, **kwargs) -> str:
|
|
37
|
+
file_paths = kwargs.get("file_paths")
|
|
38
|
+
top_n = kwargs.get("top_n", 20)
|
|
39
|
+
min_len = kwargs.get("min_word_length", 4)
|
|
40
|
+
if not file_paths or not isinstance(file_paths, list):
|
|
41
|
+
return "Error: file_paths must be a non-empty list of strings"
|
|
42
|
+
if not isinstance(top_n, int) or top_n < 1:
|
|
43
|
+
top_n = 20
|
|
44
|
+
if not isinstance(min_len, int) or min_len < 1:
|
|
45
|
+
min_len = 4
|
|
46
|
+
global_counts = {}
|
|
47
|
+
doc_counts = []
|
|
48
|
+
for path in file_paths:
|
|
49
|
+
if not isinstance(path, str) or not path.strip():
|
|
50
|
+
continue
|
|
51
|
+
p = Path(path.strip()).resolve()
|
|
52
|
+
if not p.exists() or not p.is_file():
|
|
53
|
+
continue
|
|
54
|
+
content, err = run_docproc_to_markdown(str(p))
|
|
55
|
+
if err:
|
|
56
|
+
continue
|
|
57
|
+
text = (content or "").lower()
|
|
58
|
+
words = [w for w in re.findall(r"[a-z]+", text) if len(w) >= min_len and w not in STOP]
|
|
59
|
+
local = {}
|
|
60
|
+
for w in words:
|
|
61
|
+
local[w] = local.get(w, 0) + 1
|
|
62
|
+
global_counts[w] = global_counts.get(w, 0) + 1
|
|
63
|
+
doc_counts.append({"path": p.name, "concepts": len(local), "total_tokens": len(words)})
|
|
64
|
+
sorted_global = sorted(global_counts.items(), key=lambda x: -x[1])[:top_n]
|
|
65
|
+
lines = ["Global top concepts (concept: count):"]
|
|
66
|
+
for w, c in sorted_global:
|
|
67
|
+
lines.append(f" {w}: {c}")
|
|
68
|
+
lines.append("\nPer-document concept counts:")
|
|
69
|
+
for d in doc_counts:
|
|
70
|
+
lines.append(f" {d['path']}: {d['concepts']} concepts, {d['total_tokens']} tokens")
|
|
71
|
+
return "\n".join(lines)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
register(ConceptFrequencyAnalyzerTool())
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Build a text corpus from a list of document paths (PDF, DOCX, etc.) using docproc."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown, DOCPROC_EXTENSIONS
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CorpusBuilderTool(Tool):
|
|
12
|
+
"""
|
|
13
|
+
Build a structured corpus from multiple documents.
|
|
14
|
+
Extracts text via docproc and returns a JSON-like summary with paths and excerpt lengths.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
name = "corpus_builder"
|
|
18
|
+
description = "Build a text corpus from multiple document paths (PDF, DOCX, PPTX, XLSX). Uses docproc."
|
|
19
|
+
input_schema = {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"file_paths": {
|
|
23
|
+
"type": "array",
|
|
24
|
+
"items": {"type": "string"},
|
|
25
|
+
"description": "List of paths to documents",
|
|
26
|
+
},
|
|
27
|
+
"max_chars_per_doc": {
|
|
28
|
+
"type": "integer",
|
|
29
|
+
"description": "Max chars to store per document (default 5000)",
|
|
30
|
+
},
|
|
31
|
+
},
|
|
32
|
+
"required": ["file_paths"],
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
def run(self, **kwargs) -> str:
|
|
36
|
+
file_paths = kwargs.get("file_paths")
|
|
37
|
+
max_chars = kwargs.get("max_chars_per_doc", 5000)
|
|
38
|
+
if not file_paths or not isinstance(file_paths, list):
|
|
39
|
+
return "Error: file_paths must be a non-empty list of strings"
|
|
40
|
+
if not isinstance(max_chars, int) or max_chars < 1:
|
|
41
|
+
max_chars = 5000
|
|
42
|
+
corpus = []
|
|
43
|
+
errors = []
|
|
44
|
+
for path in file_paths:
|
|
45
|
+
if not isinstance(path, str) or not path.strip():
|
|
46
|
+
continue
|
|
47
|
+
p = Path(path.strip()).resolve()
|
|
48
|
+
if not p.exists() or not p.is_file():
|
|
49
|
+
errors.append(f"{path}: not found")
|
|
50
|
+
continue
|
|
51
|
+
if p.suffix.lower() not in DOCPROC_EXTENSIONS:
|
|
52
|
+
errors.append(f"{path}: unsupported format")
|
|
53
|
+
continue
|
|
54
|
+
content, err = run_docproc_to_markdown(str(p))
|
|
55
|
+
if err:
|
|
56
|
+
errors.append(f"{path}: {err}")
|
|
57
|
+
continue
|
|
58
|
+
excerpt = (content or "").strip()[:max_chars]
|
|
59
|
+
corpus.append({"path": str(p), "length": len(content or ""), "excerpt_length": len(excerpt)})
|
|
60
|
+
result = {"documents": len(corpus), "corpus": corpus}
|
|
61
|
+
if errors:
|
|
62
|
+
result["errors"] = errors
|
|
63
|
+
return json.dumps(result, indent=2)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
register(CorpusBuilderTool())
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Link entities (capitalized phrases) across documents by matching exact strings."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CrossDocumentEntityLinkerTool(Tool):
|
|
12
|
+
"""
|
|
13
|
+
Extract capitalized multi-word phrases as candidate entities and link them across documents.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
name = "cross_document_entity_linker"
|
|
17
|
+
description = "Link entities (capitalized phrases) across multiple documents."
|
|
18
|
+
input_schema = {
|
|
19
|
+
"type": "object",
|
|
20
|
+
"properties": {
|
|
21
|
+
"file_paths": {
|
|
22
|
+
"type": "array",
|
|
23
|
+
"items": {"type": "string"},
|
|
24
|
+
"description": "List of paths to documents",
|
|
25
|
+
},
|
|
26
|
+
"min_phrase_length": {"type": "integer", "description": "Min words in phrase (default 2)"},
|
|
27
|
+
},
|
|
28
|
+
"required": ["file_paths"],
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def _extract_entities(self, text: str, min_words: int) -> set[str]:
|
|
32
|
+
pat = re.compile(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b")
|
|
33
|
+
phrases = set()
|
|
34
|
+
for m in pat.finditer(text):
|
|
35
|
+
phrase = m.group(1).strip()
|
|
36
|
+
if len(phrase.split()) >= min_words:
|
|
37
|
+
phrases.add(phrase)
|
|
38
|
+
return phrases
|
|
39
|
+
|
|
40
|
+
def run(self, **kwargs) -> str:
|
|
41
|
+
file_paths = kwargs.get("file_paths")
|
|
42
|
+
min_words = kwargs.get("min_phrase_length", 2)
|
|
43
|
+
if not file_paths or not isinstance(file_paths, list):
|
|
44
|
+
return "Error: file_paths must be a non-empty list of strings"
|
|
45
|
+
if not isinstance(min_words, int) or min_words < 1:
|
|
46
|
+
min_words = 2
|
|
47
|
+
doc_entities = {}
|
|
48
|
+
all_entities = set()
|
|
49
|
+
for path in file_paths:
|
|
50
|
+
if not isinstance(path, str) or not path.strip():
|
|
51
|
+
continue
|
|
52
|
+
p = Path(path.strip()).resolve()
|
|
53
|
+
if not p.exists() or not p.is_file():
|
|
54
|
+
continue
|
|
55
|
+
content, err = run_docproc_to_markdown(str(p))
|
|
56
|
+
if err:
|
|
57
|
+
continue
|
|
58
|
+
entities = self._extract_entities(content or "", min_words)
|
|
59
|
+
doc_entities[p.name] = list(entities)
|
|
60
|
+
all_entities |= entities
|
|
61
|
+
cross = {e: [name for name, ents in doc_entities.items() if e in ents] for e in all_entities}
|
|
62
|
+
cross = {k: v for k, v in cross.items() if len(v) > 1}
|
|
63
|
+
import json
|
|
64
|
+
|
|
65
|
+
return json.dumps(
|
|
66
|
+
{"documents": list(doc_entities.keys()), "cross_document_entities": cross, "per_doc": doc_entities},
|
|
67
|
+
indent=2,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
register(CrossDocumentEntityLinkerTool())
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Summarize a document corpus: total size, doc counts, and aggregate stats."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown, DOCPROC_EXTENSIONS
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocumentCorpusSummaryTool(Tool):
|
|
12
|
+
"""
|
|
13
|
+
Produce an aggregate summary of a corpus: number of docs, total chars, by-extension counts.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
name = "document_corpus_summary"
|
|
17
|
+
description = "Summarize a document corpus: doc count, total size, and stats per format."
|
|
18
|
+
input_schema = {
|
|
19
|
+
"type": "object",
|
|
20
|
+
"properties": {
|
|
21
|
+
"file_paths": {
|
|
22
|
+
"type": "array",
|
|
23
|
+
"items": {"type": "string"},
|
|
24
|
+
"description": "List of paths to documents",
|
|
25
|
+
},
|
|
26
|
+
"directory": {"type": "string", "description": "Alternatively, directory to scan for docs"},
|
|
27
|
+
},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def run(self, **kwargs) -> str:
|
|
31
|
+
file_paths = kwargs.get("file_paths")
|
|
32
|
+
directory = kwargs.get("directory")
|
|
33
|
+
paths = []
|
|
34
|
+
if file_paths and isinstance(file_paths, list):
|
|
35
|
+
paths = [p for p in file_paths if isinstance(p, str) and p.strip()]
|
|
36
|
+
if directory and isinstance(directory, str):
|
|
37
|
+
d = Path(directory).resolve()
|
|
38
|
+
if d.exists() and d.is_dir():
|
|
39
|
+
for ext in DOCPROC_EXTENSIONS:
|
|
40
|
+
paths.extend(str(p) for p in d.rglob(f"*{ext}"))
|
|
41
|
+
if not paths:
|
|
42
|
+
return "Error: provide file_paths (list) or directory (path to folder)"
|
|
43
|
+
by_ext = {}
|
|
44
|
+
total_chars = 0
|
|
45
|
+
success = 0
|
|
46
|
+
for path in paths:
|
|
47
|
+
p = Path(path).resolve()
|
|
48
|
+
if not p.exists() or not p.is_file():
|
|
49
|
+
continue
|
|
50
|
+
ext = p.suffix.lower()
|
|
51
|
+
if ext not in DOCPROC_EXTENSIONS:
|
|
52
|
+
continue
|
|
53
|
+
by_ext[ext] = by_ext.get(ext, 0) + 1
|
|
54
|
+
content, err = run_docproc_to_markdown(str(p))
|
|
55
|
+
if err:
|
|
56
|
+
continue
|
|
57
|
+
success += 1
|
|
58
|
+
total_chars += len(content or "")
|
|
59
|
+
summary = {
|
|
60
|
+
"total_documents": len(paths),
|
|
61
|
+
"successfully_processed": success,
|
|
62
|
+
"total_characters": total_chars,
|
|
63
|
+
"by_extension": by_ext,
|
|
64
|
+
}
|
|
65
|
+
return json.dumps(summary, indent=2)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
register(DocumentCorpusSummaryTool())
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Extract topic-like keywords from document text using frequency and stopword filtering."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
STOP = frozenset(
|
|
11
|
+
"a an the and or but in on at to for of with by from as is was are were been be have has had do does did will would could should may might must can this that these those it its".split()
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DocumentTopicExtractorTool(Tool):
|
|
16
|
+
"""
|
|
17
|
+
Extract candidate topics/keywords from a document using word frequency and stopword filtering.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
name = "document_topic_extractor"
|
|
21
|
+
description = "Extract topic keywords from a document (PDF, DOCX, etc.) via word frequency."
|
|
22
|
+
input_schema = {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"properties": {
|
|
25
|
+
"file_path": {"type": "string", "description": "Path to the document"},
|
|
26
|
+
"top_n": {"type": "integer", "description": "Number of top topics (default 15)"},
|
|
27
|
+
"min_length": {"type": "integer", "description": "Min word length (default 3)"},
|
|
28
|
+
},
|
|
29
|
+
"required": ["file_path"],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def run(self, **kwargs) -> str:
|
|
33
|
+
file_path = kwargs.get("file_path")
|
|
34
|
+
top_n = kwargs.get("top_n", 15)
|
|
35
|
+
min_length = kwargs.get("min_length", 3)
|
|
36
|
+
if not file_path or not isinstance(file_path, str):
|
|
37
|
+
return "Error: file_path must be a non-empty string"
|
|
38
|
+
if not isinstance(top_n, int) or top_n < 1:
|
|
39
|
+
top_n = 15
|
|
40
|
+
if not isinstance(min_length, int) or min_length < 1:
|
|
41
|
+
min_length = 3
|
|
42
|
+
content, err = run_docproc_to_markdown(file_path)
|
|
43
|
+
if err:
|
|
44
|
+
return err
|
|
45
|
+
text = (content or "").lower()
|
|
46
|
+
words = re.findall(r"[a-z]+", text)
|
|
47
|
+
counts = {}
|
|
48
|
+
for w in words:
|
|
49
|
+
if len(w) >= min_length and w not in STOP:
|
|
50
|
+
counts[w] = counts.get(w, 0) + 1
|
|
51
|
+
sorted_items = sorted(counts.items(), key=lambda x: -x[1])[:top_n]
|
|
52
|
+
if not sorted_items:
|
|
53
|
+
return "No topics extracted (empty or no matching words)."
|
|
54
|
+
lines = [f"{word}: {count}" for word, count in sorted_items]
|
|
55
|
+
return "Top topics (word: count):\n" + "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
register(DocumentTopicExtractorTool())
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Extract simple subject-relation-object triples from text using heuristic patterns."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
RELATION_PATTERN = re.compile(
|
|
11
|
+
r"(\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:is|are|was|were|has|have|uses|used|includes|contain)\s+([^.!?]+?)(?:\.|$)",
|
|
12
|
+
re.I,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class KnowledgeGraphExtractorTool(Tool):
|
|
17
|
+
"""
|
|
18
|
+
Extract subject-relation-object style triples from document text (heuristic, no NER).
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
name = "knowledge_graph_extractor"
|
|
22
|
+
description = "Extract subject-relation-object triples from a document for knowledge graph building."
|
|
23
|
+
input_schema = {
|
|
24
|
+
"type": "object",
|
|
25
|
+
"properties": {
|
|
26
|
+
"file_path": {"type": "string", "description": "Path to the document"},
|
|
27
|
+
"max_triples": {"type": "integer", "description": "Max triples to return (default 50)"},
|
|
28
|
+
},
|
|
29
|
+
"required": ["file_path"],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def run(self, **kwargs) -> str:
|
|
33
|
+
file_path = kwargs.get("file_path")
|
|
34
|
+
max_triples = kwargs.get("max_triples", 50)
|
|
35
|
+
if not file_path or not isinstance(file_path, str):
|
|
36
|
+
return "Error: file_path must be a non-empty string"
|
|
37
|
+
if not isinstance(max_triples, int) or max_triples < 1:
|
|
38
|
+
max_triples = 50
|
|
39
|
+
content, err = run_docproc_to_markdown(file_path)
|
|
40
|
+
if err:
|
|
41
|
+
return err
|
|
42
|
+
text = content or ""
|
|
43
|
+
triples = []
|
|
44
|
+
for m in RELATION_PATTERN.finditer(text):
|
|
45
|
+
subj = m.group(1).strip()
|
|
46
|
+
obj = m.group(2).strip()[:80]
|
|
47
|
+
if len(subj) > 2 and len(obj) > 2:
|
|
48
|
+
triples.append({"subject": subj, "object": obj})
|
|
49
|
+
if len(triples) >= max_triples:
|
|
50
|
+
break
|
|
51
|
+
if not triples:
|
|
52
|
+
return "No triples extracted (try a document with 'X is Y' / 'X has Y' style sentences)."
|
|
53
|
+
import json
|
|
54
|
+
|
|
55
|
+
return json.dumps({"triples": triples}, indent=2)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
register(KnowledgeGraphExtractorTool())
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Extract date/time mentions from document text to build a simple timeline."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
from devsper.tools.documents._docproc import run_docproc_to_markdown
|
|
9
|
+
|
|
10
|
+
YEAR_PAT = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
|
11
|
+
MONTHS = "january february march april may june july august september october november december".split()
|
|
12
|
+
MONTH_PAT = re.compile(r"\b(" + "|".join(MONTHS) + r")\s*,?\s*(19\d{2}|20\d{2})?", re.I)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TimelineExtractorTool(Tool):
|
|
16
|
+
"""
|
|
17
|
+
Extract date and year mentions from a document to produce a simple timeline.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
name = "timeline_extractor"
|
|
21
|
+
description = "Extract dates and years from a document to build a timeline."
|
|
22
|
+
input_schema = {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"properties": {
|
|
25
|
+
"file_path": {"type": "string", "description": "Path to the document"},
|
|
26
|
+
"max_entries": {"type": "integer", "description": "Max timeline entries (default 30)"},
|
|
27
|
+
},
|
|
28
|
+
"required": ["file_path"],
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
def run(self, **kwargs) -> str:
|
|
32
|
+
file_path = kwargs.get("file_path")
|
|
33
|
+
max_entries = kwargs.get("max_entries", 30)
|
|
34
|
+
if not file_path or not isinstance(file_path, str):
|
|
35
|
+
return "Error: file_path must be a non-empty string"
|
|
36
|
+
if not isinstance(max_entries, int) or max_entries < 1:
|
|
37
|
+
max_entries = 30
|
|
38
|
+
content, err = run_docproc_to_markdown(file_path)
|
|
39
|
+
if err:
|
|
40
|
+
return err
|
|
41
|
+
text = content or ""
|
|
42
|
+
years = list(dict.fromkeys(YEAR_PAT.findall(text)))
|
|
43
|
+
month_matches = list(dict.fromkeys(MONTH_PAT.findall(text)))
|
|
44
|
+
timeline = []
|
|
45
|
+
for y in sorted(years, key=int):
|
|
46
|
+
timeline.append({"type": "year", "value": y})
|
|
47
|
+
for m in month_matches:
|
|
48
|
+
month = m[0].lower()
|
|
49
|
+
year = m[1] if len(m) > 1 and m[1] else None
|
|
50
|
+
timeline.append({"type": "month", "value": f"{month} {year or ''}".strip()})
|
|
51
|
+
timeline = timeline[:max_entries]
|
|
52
|
+
if not timeline:
|
|
53
|
+
return "No dates or years found in the document."
|
|
54
|
+
import json
|
|
55
|
+
|
|
56
|
+
return json.dumps({"timeline": timeline}, indent=2)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
register(TimelineExtractorTool())
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Math and analysis tools: expressions, stats, regression, distributions, etc."""
|
|
2
|
+
|
|
3
|
+
from devsper.tools.math.calculate_expression import CalculateExpressionTool
|
|
4
|
+
from devsper.tools.math.matrix_multiply import MatrixMultiplyTool
|
|
5
|
+
from devsper.tools.math.linear_regression import LinearRegressionTool
|
|
6
|
+
from devsper.tools.math.mean_std import MeanStdTool
|
|
7
|
+
from devsper.tools.math.correlation import CorrelationTool
|
|
8
|
+
from devsper.tools.math.polynomial_fit import PolynomialFitTool
|
|
9
|
+
from devsper.tools.math.histogram import HistogramTool
|
|
10
|
+
from devsper.tools.math.monte_carlo_simulation import MonteCarloSimulationTool
|
|
11
|
+
from devsper.tools.math.random_sample import RandomSampleTool
|
|
12
|
+
from devsper.tools.math.distribution_summary import DistributionSummaryTool
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Safely evaluate a mathematical expression (numbers and basic ops only)."""
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import operator
|
|
5
|
+
|
|
6
|
+
from devsper.tools.base import Tool
|
|
7
|
+
from devsper.tools.registry import register
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CalculateExpressionTool(Tool):
|
|
11
|
+
"""Evaluate a safe math expression: numbers, +, -, *, /, **, parentheses. No names."""
|
|
12
|
+
|
|
13
|
+
name = "calculate_expression"
|
|
14
|
+
description = "Evaluate a mathematical expression. Only numbers and +, -, *, /, ** allowed."
|
|
15
|
+
input_schema = {
|
|
16
|
+
"type": "object",
|
|
17
|
+
"properties": {"expression": {"type": "string", "description": "Math expression, e.g. 2 + 3 * 4"}},
|
|
18
|
+
"required": ["expression"],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def run(self, **kwargs) -> str:
|
|
22
|
+
expression = kwargs.get("expression")
|
|
23
|
+
if not expression or not isinstance(expression, str):
|
|
24
|
+
return "Error: expression must be a non-empty string"
|
|
25
|
+
try:
|
|
26
|
+
tree = ast.parse(expression, mode="eval")
|
|
27
|
+
ops = {
|
|
28
|
+
ast.Add: operator.add,
|
|
29
|
+
ast.Sub: operator.sub,
|
|
30
|
+
ast.Mult: operator.mul,
|
|
31
|
+
ast.Div: operator.truediv,
|
|
32
|
+
ast.Pow: operator.pow,
|
|
33
|
+
ast.USub: operator.neg,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def eval_node(node):
|
|
37
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
|
|
38
|
+
return node.value
|
|
39
|
+
if isinstance(node, ast.BinOp):
|
|
40
|
+
left = eval_node(node.left)
|
|
41
|
+
right = eval_node(node.right)
|
|
42
|
+
return ops[type(node.op)](left, right)
|
|
43
|
+
if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.USub):
|
|
44
|
+
return -eval_node(node.operand)
|
|
45
|
+
raise ValueError("Only numbers and +, -, *, /, ** allowed")
|
|
46
|
+
result = eval_node(tree.body)
|
|
47
|
+
return str(result)
|
|
48
|
+
except (ValueError, SyntaxError, TypeError) as e:
|
|
49
|
+
return f"Error: {e}"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
register(CalculateExpressionTool())
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Compute Pearson correlation between two lists of numbers."""
|
|
2
|
+
|
|
3
|
+
from devsper.tools.base import Tool
|
|
4
|
+
from devsper.tools.registry import register
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CorrelationTool(Tool):
|
|
8
|
+
"""Compute Pearson correlation coefficient between x and y."""
|
|
9
|
+
|
|
10
|
+
name = "correlation"
|
|
11
|
+
description = "Compute Pearson correlation between two lists of numbers."
|
|
12
|
+
input_schema = {
|
|
13
|
+
"type": "object",
|
|
14
|
+
"properties": {
|
|
15
|
+
"x": {"type": "array", "description": "First list of numbers"},
|
|
16
|
+
"y": {"type": "array", "description": "Second list of numbers"},
|
|
17
|
+
},
|
|
18
|
+
"required": ["x", "y"],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def run(self, **kwargs) -> str:
|
|
22
|
+
x = kwargs.get("x")
|
|
23
|
+
y = kwargs.get("y")
|
|
24
|
+
if not isinstance(x, list) or not isinstance(y, list):
|
|
25
|
+
return "Error: x and y must be arrays"
|
|
26
|
+
if len(x) != len(y) or len(x) < 2:
|
|
27
|
+
return "Error: x and y must have same length >= 2"
|
|
28
|
+
try:
|
|
29
|
+
X = [float(v) for v in x]
|
|
30
|
+
Y = [float(v) for v in y]
|
|
31
|
+
except (TypeError, ValueError):
|
|
32
|
+
return "Error: all elements must be numbers"
|
|
33
|
+
n = len(X)
|
|
34
|
+
mx = sum(X) / n
|
|
35
|
+
my = sum(Y) / n
|
|
36
|
+
sx = (sum((a - mx) ** 2 for a in X) / n) ** 0.5
|
|
37
|
+
sy = (sum((b - my) ** 2 for b in Y) / n) ** 0.5
|
|
38
|
+
if sx == 0 or sy == 0:
|
|
39
|
+
return "Error: zero variance in x or y"
|
|
40
|
+
r = sum((X[i] - mx) * (Y[i] - my) for i in range(n)) / (n * sx * sy)
|
|
41
|
+
return f"Pearson r = {r}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
register(CorrelationTool())
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Summary statistics for a list of numbers: min, max, mean, median, quartiles."""
|
|
2
|
+
|
|
3
|
+
from devsper.tools.base import Tool
|
|
4
|
+
from devsper.tools.registry import register
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DistributionSummaryTool(Tool):
|
|
8
|
+
"""Compute distribution summary: min, max, mean, median, Q1, Q3."""
|
|
9
|
+
|
|
10
|
+
name = "distribution_summary"
|
|
11
|
+
description = "Summary of numeric distribution: min, max, mean, median, quartiles."
|
|
12
|
+
input_schema = {
|
|
13
|
+
"type": "object",
|
|
14
|
+
"properties": {"values": {"type": "array", "description": "List of numbers"}},
|
|
15
|
+
"required": ["values"],
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
def run(self, **kwargs) -> str:
|
|
19
|
+
values = kwargs.get("values")
|
|
20
|
+
if not isinstance(values, list):
|
|
21
|
+
return "Error: values must be an array"
|
|
22
|
+
try:
|
|
23
|
+
nums = sorted(float(v) for v in values)
|
|
24
|
+
except (TypeError, ValueError):
|
|
25
|
+
return "Error: all elements must be numbers"
|
|
26
|
+
if not nums:
|
|
27
|
+
return "Error: empty list"
|
|
28
|
+
n = len(nums)
|
|
29
|
+
mean = sum(nums) / n
|
|
30
|
+
mid = n // 2
|
|
31
|
+
median = (nums[mid - 1] + nums[mid]) / 2 if n % 2 == 0 else nums[mid]
|
|
32
|
+
q1_idx = n // 4
|
|
33
|
+
q3_idx = (3 * n) // 4
|
|
34
|
+
q1 = nums[q1_idx]
|
|
35
|
+
q3 = nums[q3_idx]
|
|
36
|
+
return f"min = {nums[0]}\nmax = {nums[-1]}\nmean = {mean}\nmedian = {median}\nQ1 = {q1}\nQ3 = {q3}\nn = {n}"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
register(DistributionSummaryTool())
|