dslighting 1.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. dsat/__init__.py +3 -0
  2. dsat/benchmark/__init__.py +1 -0
  3. dsat/benchmark/benchmark.py +168 -0
  4. dsat/benchmark/datasci.py +291 -0
  5. dsat/benchmark/mle.py +777 -0
  6. dsat/benchmark/sciencebench.py +304 -0
  7. dsat/common/__init__.py +0 -0
  8. dsat/common/constants.py +11 -0
  9. dsat/common/exceptions.py +48 -0
  10. dsat/common/typing.py +19 -0
  11. dsat/config.py +79 -0
  12. dsat/models/__init__.py +3 -0
  13. dsat/models/candidates.py +16 -0
  14. dsat/models/formats.py +52 -0
  15. dsat/models/task.py +64 -0
  16. dsat/operators/__init__.py +0 -0
  17. dsat/operators/aflow_ops.py +90 -0
  18. dsat/operators/autokaggle_ops.py +170 -0
  19. dsat/operators/automind_ops.py +38 -0
  20. dsat/operators/base.py +22 -0
  21. dsat/operators/code.py +45 -0
  22. dsat/operators/dsagent_ops.py +123 -0
  23. dsat/operators/llm_basic.py +84 -0
  24. dsat/prompts/__init__.py +0 -0
  25. dsat/prompts/aflow_prompt.py +76 -0
  26. dsat/prompts/aide_prompt.py +52 -0
  27. dsat/prompts/autokaggle_prompt.py +290 -0
  28. dsat/prompts/automind_prompt.py +29 -0
  29. dsat/prompts/common.py +51 -0
  30. dsat/prompts/data_interpreter_prompt.py +82 -0
  31. dsat/prompts/dsagent_prompt.py +88 -0
  32. dsat/runner.py +554 -0
  33. dsat/services/__init__.py +0 -0
  34. dsat/services/data_analyzer.py +387 -0
  35. dsat/services/llm.py +486 -0
  36. dsat/services/llm_single.py +421 -0
  37. dsat/services/sandbox.py +386 -0
  38. dsat/services/states/__init__.py +0 -0
  39. dsat/services/states/autokaggle_state.py +43 -0
  40. dsat/services/states/base.py +14 -0
  41. dsat/services/states/dsa_log.py +13 -0
  42. dsat/services/states/experience.py +237 -0
  43. dsat/services/states/journal.py +153 -0
  44. dsat/services/states/operator_library.py +290 -0
  45. dsat/services/vdb.py +76 -0
  46. dsat/services/workspace.py +178 -0
  47. dsat/tasks/__init__.py +3 -0
  48. dsat/tasks/handlers.py +376 -0
  49. dsat/templates/open_ended/grade_template.py +107 -0
  50. dsat/tools/__init__.py +4 -0
  51. dsat/utils/__init__.py +0 -0
  52. dsat/utils/context.py +172 -0
  53. dsat/utils/dynamic_import.py +71 -0
  54. dsat/utils/parsing.py +33 -0
  55. dsat/workflows/__init__.py +12 -0
  56. dsat/workflows/base.py +53 -0
  57. dsat/workflows/factory.py +439 -0
  58. dsat/workflows/manual/__init__.py +0 -0
  59. dsat/workflows/manual/autokaggle_workflow.py +148 -0
  60. dsat/workflows/manual/data_interpreter_workflow.py +153 -0
  61. dsat/workflows/manual/deepanalyze_workflow.py +484 -0
  62. dsat/workflows/manual/dsagent_workflow.py +76 -0
  63. dsat/workflows/search/__init__.py +0 -0
  64. dsat/workflows/search/aflow_workflow.py +344 -0
  65. dsat/workflows/search/aide_workflow.py +283 -0
  66. dsat/workflows/search/automind_workflow.py +237 -0
  67. dsat/workflows/templates/__init__.py +0 -0
  68. dsat/workflows/templates/basic_kaggle_loop.py +71 -0
  69. dslighting/__init__.py +170 -0
  70. dslighting/core/__init__.py +13 -0
  71. dslighting/core/agent.py +646 -0
  72. dslighting/core/config_builder.py +318 -0
  73. dslighting/core/data_loader.py +422 -0
  74. dslighting/core/task_detector.py +422 -0
  75. dslighting/utils/__init__.py +19 -0
  76. dslighting/utils/defaults.py +151 -0
  77. dslighting-1.3.9.dist-info/METADATA +554 -0
  78. dslighting-1.3.9.dist-info/RECORD +80 -0
  79. dslighting-1.3.9.dist-info/WHEEL +5 -0
  80. dslighting-1.3.9.dist-info/top_level.txt +2 -0
dslighting/__init__.py ADDED
@@ -0,0 +1,170 @@
1
+ """
2
+ DSLighting: Simplified API for Data Science Agent Automation
3
+
4
+ A progressive API that provides sensible defaults with full control when needed.
5
+
6
+ Quick Start:
7
+ >>> import dslighting
8
+ >>>
9
+ >>> # Simple usage
10
+ >>> data = dslighting.load_data("path/to/data")
11
+ >>> agent = dslighting.Agent()
12
+ >>> result = agent.run(data)
13
+ >>>
14
+ >>> # One-liner
15
+ >>> result = dslighting.run_agent("path/to/data")
16
+
17
+ Advanced Usage:
18
+ >>> agent = dslighting.Agent(
19
+ ... workflow="autokaggle",
20
+ ... model="gpt-4o",
21
+ ... temperature=0.5,
22
+ ... max_iterations=10
23
+ ... )
24
+ >>> result = agent.run(data)
25
+
26
+ For more information, see: https://github.com/usail-hkust/dslighting
27
+ """
28
+
29
+ __version__ = "1.3.9"
30
+ __author__ = "DSLighting Team"
31
+
32
+ # Core API classes
33
+ from dslighting.core.agent import Agent, AgentResult
34
+ from dslighting.core.data_loader import DataLoader, LoadedData
35
+
36
+ # Convenience functions
37
+ def load_data(source, **kwargs):
38
+ """
39
+ Load and auto-detect data type.
40
+
41
+ This is a convenience function that creates a DataLoader and loads data.
42
+ For Kaggle/MLE-Bench competitions, it automatically extracts task_id from the path.
43
+
44
+ Args:
45
+ source: Data source (path, DataFrame, dict, etc.)
46
+ **kwargs: Additional parameters passed to DataLoader
47
+
48
+ Returns:
49
+ LoadedData with auto-detected task information and task_id
50
+
51
+ Examples:
52
+ >>> # Load from competition path (task_id auto-detected)
53
+ >>> data = dslighting.load_data("data/competitions/bike-sharing-demand")
54
+ >>> print(data.task_id) # "bike-sharing-demand"
55
+ >>>
56
+ >>> agent = dslighting.Agent()
57
+ >>> result = agent.run(data) # Auto-graded using task_id
58
+
59
+ >>> # Load DataFrame (no task_id)
60
+ >>> df = pd.read_csv("data.csv")
61
+ >>> data = dslighting.load_data(df)
62
+
63
+ >>> data = dslighting.load_data("data/house-prices")
64
+ >>> print(data.task_detection.task_type)
65
+ 'kaggle'
66
+ """
67
+ loader = DataLoader()
68
+ return loader.load(source, **kwargs)
69
+
70
+
71
+ def run_agent(data=None, task_id=None, data_dir=None, keep_workspace=False, keep_workspace_on_failure=True, **kwargs):
72
+ """
73
+ Quick one-liner: load data and run with defaults.
74
+
75
+ This function creates an Agent with the specified parameters and runs it on the data.
76
+
77
+ Args:
78
+ data: Optional data source (path, DataFrame, dict, etc.)
79
+ task_id: Task/Competition identifier (e.g., "bike-sharing-demand")
80
+ data_dir: Base data directory (default: "data/competitions")
81
+ keep_workspace: Keep workspace after completion (default: False)
82
+ keep_workspace_on_failure: Keep workspace on failure (default: True)
83
+ **kwargs: Parameters passed to Agent.__init__ and Agent.run
84
+
85
+ Returns:
86
+ AgentResult with output, metrics, and metadata
87
+
88
+ Examples:
89
+ >>> # Recommended: using task_id
90
+ >>> result = dslighting.run_agent(
91
+ ... task_id="bike-sharing-demand",
92
+ ... data_dir="data/competitions"
93
+ ... )
94
+ >>> print(f"Score: {result.score}, Cost: ${result.cost}")
95
+
96
+ >>> # Legacy: using data path
97
+ >>> result = dslighting.run_agent("data/titanic")
98
+
99
+ >>> # With custom parameters
100
+ >>> result = dslighting.run_agent(
101
+ ... task_id="bike-sharing-demand",
102
+ ... workflow="autokaggle",
103
+ ... model="gpt-4o",
104
+ ... keep_workspace=True # Keep workspace for debugging
105
+ ... )
106
+ """
107
+ # Extract run-specific parameters if present
108
+ run_kwargs = {}
109
+ agent_params = {}
110
+
111
+ # Parameters that should go to run(), not __init__
112
+ run_only_params = {'task_id', 'data_dir', 'output_path', 'description'}
113
+
114
+ for key, value in kwargs.items():
115
+ if key in run_only_params:
116
+ run_kwargs[key] = value
117
+ else:
118
+ agent_params[key] = value
119
+
120
+ # Add explicit parameters to run_kwargs
121
+ if task_id is not None:
122
+ run_kwargs['task_id'] = task_id
123
+ if data_dir is not None:
124
+ run_kwargs['data_dir'] = data_dir
125
+
126
+ # Add workspace preservation parameters to agent
127
+ agent_params['keep_workspace'] = keep_workspace
128
+ agent_params['keep_workspace_on_failure'] = keep_workspace_on_failure
129
+
130
+ # Create agent and run
131
+ agent = Agent(**agent_params)
132
+ return agent.run(data, **run_kwargs)
133
+
134
+
135
+ # Public API
136
+ __all__ = [
137
+ # Version info
138
+ "__version__",
139
+ "__author__",
140
+
141
+ # Core classes
142
+ "Agent",
143
+ "AgentResult",
144
+ "DataLoader",
145
+ "LoadedData",
146
+
147
+ # Convenience functions
148
+ "load_data",
149
+ "run_agent",
150
+ ]
151
+
152
+
153
+ # Import logging configuration
154
+ try:
155
+ import logging
156
+ from rich.logging import RichHandler
157
+
158
+ # Set up rich logging if available
159
+ logging.basicConfig(
160
+ level="INFO",
161
+ format="%(asctime)s [%(levelname)s] %(message)s",
162
+ datefmt="%H:%M:%S",
163
+ handlers=[RichHandler(show_path=False)],
164
+ )
165
+ except ImportError:
166
+ # Fallback to basic logging
167
+ logging.basicConfig(
168
+ level="INFO",
169
+ format="%(asctime)s [%(levelname)s] %(message)s",
170
+ )
@@ -0,0 +1,13 @@
1
+ """
2
+ Core modules for DSLighting simplified API.
3
+ """
4
+
5
+ from dslighting.core.agent import Agent, AgentResult
6
+ from dslighting.core.data_loader import DataLoader, LoadedData
7
+
8
+ __all__ = [
9
+ "Agent",
10
+ "AgentResult",
11
+ "DataLoader",
12
+ "LoadedData",
13
+ ]