shared_tools 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +594 -42
- data/lib/shared_tools/{ruby_llm/mcp → mcp}/github_mcp_server.rb +20 -3
- data/lib/shared_tools/mcp/imcp.rb +28 -0
- data/lib/shared_tools/mcp/tavily_mcp_server.rb +44 -0
- data/lib/shared_tools/mcp.rb +24 -0
- data/lib/shared_tools/tools/browser/base_driver.rb +64 -0
- data/lib/shared_tools/tools/browser/base_tool.rb +50 -0
- data/lib/shared_tools/tools/browser/click_tool.rb +54 -0
- data/lib/shared_tools/tools/browser/elements/element_grouper.rb +73 -0
- data/lib/shared_tools/tools/browser/elements/nearby_element_detector.rb +109 -0
- data/lib/shared_tools/tools/browser/formatters/action_formatter.rb +37 -0
- data/lib/shared_tools/tools/browser/formatters/data_entry_formatter.rb +135 -0
- data/lib/shared_tools/tools/browser/formatters/element_formatter.rb +52 -0
- data/lib/shared_tools/tools/browser/formatters/input_formatter.rb +59 -0
- data/lib/shared_tools/tools/browser/inspect_tool.rb +87 -0
- data/lib/shared_tools/tools/browser/inspect_utils.rb +51 -0
- data/lib/shared_tools/tools/browser/page_inspect/button_summarizer.rb +140 -0
- data/lib/shared_tools/tools/browser/page_inspect/form_summarizer.rb +98 -0
- data/lib/shared_tools/tools/browser/page_inspect/html_summarizer.rb +37 -0
- data/lib/shared_tools/tools/browser/page_inspect/link_summarizer.rb +103 -0
- data/lib/shared_tools/tools/browser/page_inspect_tool.rb +55 -0
- data/lib/shared_tools/tools/browser/page_screenshot_tool.rb +39 -0
- data/lib/shared_tools/tools/browser/selector_generator/base_selectors.rb +28 -0
- data/lib/shared_tools/tools/browser/selector_generator/contextual_selectors.rb +140 -0
- data/lib/shared_tools/tools/browser/selector_generator.rb +73 -0
- data/lib/shared_tools/tools/browser/selector_inspect_tool.rb +67 -0
- data/lib/shared_tools/tools/browser/text_field_area_set_tool.rb +45 -0
- data/lib/shared_tools/tools/browser/visit_tool.rb +43 -0
- data/lib/shared_tools/tools/browser/watir_driver.rb +132 -0
- data/lib/shared_tools/tools/browser.rb +27 -0
- data/lib/shared_tools/tools/browser_tool.rb +255 -0
- data/lib/shared_tools/tools/calculator_tool.rb +169 -0
- data/lib/shared_tools/tools/composite_analysis_tool.rb +520 -0
- data/lib/shared_tools/tools/computer/base_driver.rb +177 -0
- data/lib/shared_tools/tools/computer/mac_driver.rb +103 -0
- data/lib/shared_tools/tools/computer.rb +21 -0
- data/lib/shared_tools/tools/computer_tool.rb +207 -0
- data/lib/shared_tools/tools/data_science_kit.rb +707 -0
- data/lib/shared_tools/tools/database/base_driver.rb +17 -0
- data/lib/shared_tools/tools/database/postgres_driver.rb +30 -0
- data/lib/shared_tools/tools/database/sqlite_driver.rb +29 -0
- data/lib/shared_tools/tools/database.rb +9 -0
- data/lib/shared_tools/tools/database_query_tool.rb +313 -0
- data/lib/shared_tools/tools/database_tool.rb +99 -0
- data/lib/shared_tools/tools/devops_toolkit.rb +420 -0
- data/lib/shared_tools/tools/disk/base_driver.rb +91 -0
- data/lib/shared_tools/tools/disk/base_tool.rb +20 -0
- data/lib/shared_tools/tools/disk/directory_create_tool.rb +39 -0
- data/lib/shared_tools/tools/disk/directory_delete_tool.rb +39 -0
- data/lib/shared_tools/tools/disk/directory_list_tool.rb +37 -0
- data/lib/shared_tools/tools/disk/directory_move_tool.rb +40 -0
- data/lib/shared_tools/tools/disk/file_create_tool.rb +38 -0
- data/lib/shared_tools/tools/disk/file_delete_tool.rb +40 -0
- data/lib/shared_tools/tools/disk/file_move_tool.rb +43 -0
- data/lib/shared_tools/tools/disk/file_read_tool.rb +40 -0
- data/lib/shared_tools/tools/disk/file_replace_tool.rb +44 -0
- data/lib/shared_tools/tools/disk/file_write_tool.rb +40 -0
- data/lib/shared_tools/tools/disk/local_driver.rb +91 -0
- data/lib/shared_tools/tools/disk.rb +17 -0
- data/lib/shared_tools/tools/disk_tool.rb +132 -0
- data/lib/shared_tools/tools/doc/pdf_reader_tool.rb +79 -0
- data/lib/shared_tools/tools/doc.rb +8 -0
- data/lib/shared_tools/tools/doc_tool.rb +109 -0
- data/lib/shared_tools/tools/docker/base_tool.rb +56 -0
- data/lib/shared_tools/tools/docker/compose_run_tool.rb +77 -0
- data/lib/shared_tools/tools/docker.rb +8 -0
- data/lib/shared_tools/tools/error_handling_tool.rb +403 -0
- data/lib/shared_tools/tools/eval/python_eval_tool.rb +209 -0
- data/lib/shared_tools/tools/eval/ruby_eval_tool.rb +93 -0
- data/lib/shared_tools/tools/eval/shell_eval_tool.rb +64 -0
- data/lib/shared_tools/tools/eval.rb +10 -0
- data/lib/shared_tools/tools/eval_tool.rb +139 -0
- data/lib/shared_tools/tools/secure_tool_template.rb +353 -0
- data/lib/shared_tools/tools/version.rb +7 -0
- data/lib/shared_tools/tools/weather_tool.rb +197 -0
- data/lib/shared_tools/tools/workflow_manager_tool.rb +312 -0
- data/lib/shared_tools/tools.rb +16 -0
- data/lib/shared_tools/version.rb +1 -1
- data/lib/shared_tools.rb +9 -24
- metadata +189 -68
- data/lib/shared_tools/llm_rb/run_shell_command.rb +0 -23
- data/lib/shared_tools/llm_rb.rb +0 -9
- data/lib/shared_tools/omniai.rb +0 -9
- data/lib/shared_tools/raix/what_is_the_weather.rb +0 -18
- data/lib/shared_tools/raix.rb +0 -9
- data/lib/shared_tools/ruby_llm/edit_file.rb +0 -71
- data/lib/shared_tools/ruby_llm/incomplete/calculator_tool.rb +0 -70
- data/lib/shared_tools/ruby_llm/incomplete/composite_analysis_tool.rb +0 -89
- data/lib/shared_tools/ruby_llm/incomplete/data_science_kit.rb +0 -128
- data/lib/shared_tools/ruby_llm/incomplete/database_query_tool.rb +0 -100
- data/lib/shared_tools/ruby_llm/incomplete/devops_toolkit.rb +0 -112
- data/lib/shared_tools/ruby_llm/incomplete/error_handling_tool.rb +0 -109
- data/lib/shared_tools/ruby_llm/incomplete/secure_tool_template.rb +0 -117
- data/lib/shared_tools/ruby_llm/incomplete/weather_tool.rb +0 -110
- data/lib/shared_tools/ruby_llm/incomplete/workflow_manager_tool.rb +0 -145
- data/lib/shared_tools/ruby_llm/list_files.rb +0 -49
- data/lib/shared_tools/ruby_llm/mcp/imcp.rb +0 -15
- data/lib/shared_tools/ruby_llm/mcp.rb +0 -12
- data/lib/shared_tools/ruby_llm/pdf_page_reader.rb +0 -59
- data/lib/shared_tools/ruby_llm/python_eval.rb +0 -194
- data/lib/shared_tools/ruby_llm/read_file.rb +0 -40
- data/lib/shared_tools/ruby_llm/ruby_eval.rb +0 -77
- data/lib/shared_tools/ruby_llm/run_shell_command.rb +0 -49
- data/lib/shared_tools/ruby_llm.rb +0 -12
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
# composite_analysis_tool.rb - Tool that orchestrates multiple analysis steps
|
|
2
|
+
require 'ruby_llm/tool'
|
|
3
|
+
require 'json'
|
|
4
|
+
|
|
5
|
+
module SharedTools
|
|
6
|
+
module Tools
|
|
7
|
+
class CompositeAnalysisTool < RubyLLM::Tool
|
|
8
|
+
def self.name = "composite_analysis"
|
|
9
|
+
|
|
10
|
+
description <<~'DESCRIPTION'
|
|
11
|
+
Perform comprehensive multi-stage data analysis by orchestrating multiple specialized analysis steps
|
|
12
|
+
to provide complete insights from various data sources. This composite tool automatically
|
|
13
|
+
determines the appropriate data fetching method (web scraping for URLs, file reading for
|
|
14
|
+
local paths), analyzes data structure and content, generates statistical insights,
|
|
15
|
+
and suggests appropriate visualizations based on the data characteristics.
|
|
16
|
+
Ideal for exploratory data analysis workflows where you need a complete picture
|
|
17
|
+
from initial data loading through final insights. Handles CSV, JSON, and text data formats.
|
|
18
|
+
DESCRIPTION
|
|
19
|
+
|
|
20
|
+
params do
|
|
21
|
+
string :data_source, description: <<~DESC.strip, required: true
|
|
22
|
+
Primary data source to analyze. Can be either a local file path or a web URL.
|
|
23
|
+
For files: Use relative or absolute paths to CSV, JSON, XML, or text files.
|
|
24
|
+
For URLs: Use complete HTTP/HTTPS URLs to accessible data endpoints or web pages.
|
|
25
|
+
The tool automatically detects the source type and uses appropriate fetching methods.
|
|
26
|
+
Examples: './data/sales.csv', '/home/user/data.json', 'https://api.example.com/data'
|
|
27
|
+
DESC
|
|
28
|
+
|
|
29
|
+
string :analysis_type, description: <<~DESC.strip, required: false
|
|
30
|
+
Type of analysis to perform: 'quick', 'standard', or 'comprehensive'.
|
|
31
|
+
Quick: Basic structure and summary statistics only (fastest).
|
|
32
|
+
Standard: Includes structure, insights, and visualization suggestions (recommended).
|
|
33
|
+
Comprehensive: Full analysis with detailed correlations and patterns (slowest).
|
|
34
|
+
Default: standard
|
|
35
|
+
DESC
|
|
36
|
+
|
|
37
|
+
object :options, description: <<~DESC.strip, required: false do
|
|
38
|
+
Additional analysis options for customizing the analysis behavior.
|
|
39
|
+
These options allow fine-tuning the analysis process for different dataset sizes and requirements.
|
|
40
|
+
DESC
|
|
41
|
+
integer :sample_size, description: "Maximum number of rows to analyze for large datasets. Helps manage performance on very large files.", required: false
|
|
42
|
+
boolean :include_correlations, description: "Enable correlation analysis. Set to false to skip correlation calculations. Default: true", required: false
|
|
43
|
+
integer :visualization_limit, description: "Maximum number of visualizations to suggest. Default: 5", required: false
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def initialize(logger: nil)
|
|
48
|
+
@logger = logger || RubyLLM.logger
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def execute(data_source:, analysis_type: "standard", **options)
|
|
52
|
+
results = {}
|
|
53
|
+
analysis_start = Time.now
|
|
54
|
+
|
|
55
|
+
begin
|
|
56
|
+
@logger.info("CompositeAnalysisTool#execute data_source=#{data_source} analysis_type=#{analysis_type}")
|
|
57
|
+
|
|
58
|
+
# Step 1: Fetch data using appropriate method
|
|
59
|
+
@logger.debug("Fetching data from source...")
|
|
60
|
+
if data_source.start_with?('http://', 'https://')
|
|
61
|
+
results[:data] = fetch_web_data(data_source)
|
|
62
|
+
results[:source_type] = 'web'
|
|
63
|
+
else
|
|
64
|
+
results[:data] = read_file_data(data_source)
|
|
65
|
+
results[:source_type] = 'file'
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Step 2: Analyze data structure
|
|
69
|
+
@logger.debug("Analyzing data structure...")
|
|
70
|
+
results[:structure] = analyze_data_structure(results[:data])
|
|
71
|
+
|
|
72
|
+
# Step 3: Generate insights based on analysis type
|
|
73
|
+
if ['standard', 'comprehensive'].include?(analysis_type)
|
|
74
|
+
@logger.debug("Generating insights...")
|
|
75
|
+
results[:insights] = generate_insights(results[:data], results[:structure], options)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Step 4: Create visualization suggestions
|
|
79
|
+
if results[:structure][:numeric_columns]&.any?
|
|
80
|
+
@logger.debug("Suggesting visualizations...")
|
|
81
|
+
viz_limit = options[:visualization_limit] || 5
|
|
82
|
+
results[:visualizations] = suggest_visualizations(results[:structure], viz_limit)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Step 5: Perform correlation analysis for comprehensive mode
|
|
86
|
+
if analysis_type == 'comprehensive' && results[:structure][:numeric_columns]&.length.to_i > 1
|
|
87
|
+
include_corr = options[:include_correlations].nil? ? true : options[:include_correlations]
|
|
88
|
+
if include_corr
|
|
89
|
+
@logger.debug("Performing correlation analysis...")
|
|
90
|
+
results[:correlations] = perform_correlation_analysis(results[:data], results[:structure])
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
analysis_duration = (Time.now - analysis_start).round(3)
|
|
95
|
+
@logger.info("Analysis completed in #{analysis_duration}s")
|
|
96
|
+
|
|
97
|
+
{
|
|
98
|
+
success: true,
|
|
99
|
+
analysis: results,
|
|
100
|
+
data_source: data_source,
|
|
101
|
+
analysis_type: analysis_type,
|
|
102
|
+
analyzed_at: Time.now.iso8601,
|
|
103
|
+
duration_seconds: analysis_duration
|
|
104
|
+
}
|
|
105
|
+
rescue => e
|
|
106
|
+
@logger.error("Analysis failed: #{e.message}")
|
|
107
|
+
{
|
|
108
|
+
success: false,
|
|
109
|
+
error: e.message,
|
|
110
|
+
error_type: e.class.name,
|
|
111
|
+
data_source: data_source,
|
|
112
|
+
partial_results: results
|
|
113
|
+
}
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
private
|
|
118
|
+
|
|
119
|
+
# Fetch data from web URL
|
|
120
|
+
def fetch_web_data(url)
|
|
121
|
+
@logger.debug("Fetching web data from: #{url}")
|
|
122
|
+
|
|
123
|
+
# In a real implementation, this would use HTTP client
|
|
124
|
+
# For simulation, return sample data based on URL patterns
|
|
125
|
+
case url
|
|
126
|
+
when /\.json$/
|
|
127
|
+
simulate_json_data
|
|
128
|
+
when /\.csv$/
|
|
129
|
+
simulate_csv_data
|
|
130
|
+
else
|
|
131
|
+
simulate_api_response
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Read data from local file
|
|
136
|
+
def read_file_data(file_path)
|
|
137
|
+
@logger.debug("Reading file data from: #{file_path}")
|
|
138
|
+
|
|
139
|
+
# Check if file exists (for real files)
|
|
140
|
+
unless File.exist?(file_path)
|
|
141
|
+
# For demo/testing, return simulated data based on file extension
|
|
142
|
+
@logger.warn("File not found (#{file_path}), using simulated data")
|
|
143
|
+
case File.extname(file_path).downcase
|
|
144
|
+
when '.json'
|
|
145
|
+
return simulate_json_data
|
|
146
|
+
when '.txt'
|
|
147
|
+
return ["Sample text line 1", "Sample text line 2", "Sample text line 3"]
|
|
148
|
+
else
|
|
149
|
+
return simulate_csv_data
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Determine file type and parse accordingly
|
|
154
|
+
case File.extname(file_path).downcase
|
|
155
|
+
when '.json'
|
|
156
|
+
JSON.parse(File.read(file_path))
|
|
157
|
+
when '.txt'
|
|
158
|
+
File.readlines(file_path).map(&:chomp)
|
|
159
|
+
else
|
|
160
|
+
# For real CSV files in production, would need csv gem
|
|
161
|
+
# For now, return simulated data
|
|
162
|
+
@logger.warn("CSV parsing requires csv gem, using simulated data")
|
|
163
|
+
simulate_csv_data
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Analyze the structure of the data
|
|
168
|
+
def analyze_data_structure(data)
|
|
169
|
+
structure = {
|
|
170
|
+
data_type: determine_data_type(data),
|
|
171
|
+
row_count: count_rows(data),
|
|
172
|
+
columns: [],
|
|
173
|
+
numeric_columns: [],
|
|
174
|
+
categorical_columns: [],
|
|
175
|
+
text_columns: []
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
case structure[:data_type]
|
|
179
|
+
when 'tabular'
|
|
180
|
+
analyze_tabular_structure(data, structure)
|
|
181
|
+
when 'json'
|
|
182
|
+
analyze_json_structure(data, structure)
|
|
183
|
+
when 'text'
|
|
184
|
+
analyze_text_structure(data, structure)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
structure
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Generate insights from the data
|
|
191
|
+
def generate_insights(data, structure, options)
|
|
192
|
+
insights = {
|
|
193
|
+
summary: generate_summary(structure),
|
|
194
|
+
quality: assess_data_quality(data, structure),
|
|
195
|
+
recommendations: generate_recommendations(structure)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
# Add statistical insights for numeric columns
|
|
199
|
+
if structure[:numeric_columns]&.any?
|
|
200
|
+
insights[:statistics] = calculate_statistics(data, structure)
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Add patterns for categorical data
|
|
204
|
+
if structure[:categorical_columns]&.any?
|
|
205
|
+
insights[:patterns] = identify_patterns(data, structure)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
insights
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Suggest appropriate visualizations
|
|
212
|
+
def suggest_visualizations(structure, limit = 5)
|
|
213
|
+
suggestions = []
|
|
214
|
+
|
|
215
|
+
# Distribution plots for numeric columns
|
|
216
|
+
structure[:numeric_columns]&.each do |col|
|
|
217
|
+
suggestions << {
|
|
218
|
+
type: 'histogram',
|
|
219
|
+
column: col[:name],
|
|
220
|
+
purpose: "Show distribution of #{col[:name]} values",
|
|
221
|
+
priority: 'high'
|
|
222
|
+
}
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# Bar charts for categorical data
|
|
226
|
+
structure[:categorical_columns]&.each do |col|
|
|
227
|
+
if col[:unique_values] < 20
|
|
228
|
+
suggestions << {
|
|
229
|
+
type: 'bar_chart',
|
|
230
|
+
column: col[:name],
|
|
231
|
+
purpose: "Show frequency of #{col[:name]} categories",
|
|
232
|
+
priority: 'medium'
|
|
233
|
+
}
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Scatter plots for numeric pairs
|
|
238
|
+
if structure[:numeric_columns]&.length.to_i > 1
|
|
239
|
+
num_cols = structure[:numeric_columns]
|
|
240
|
+
suggestions << {
|
|
241
|
+
type: 'scatter_plot',
|
|
242
|
+
columns: [num_cols[0][:name], num_cols[1][:name]],
|
|
243
|
+
purpose: "Explore relationship between #{num_cols[0][:name]} and #{num_cols[1][:name]}",
|
|
244
|
+
priority: 'high'
|
|
245
|
+
}
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Time series if date column exists
|
|
249
|
+
if structure[:columns]&.any? { |c| c[:type] == 'date' }
|
|
250
|
+
suggestions << {
|
|
251
|
+
type: 'time_series',
|
|
252
|
+
purpose: "Track changes over time",
|
|
253
|
+
priority: 'high'
|
|
254
|
+
}
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Limit and sort by priority
|
|
258
|
+
suggestions
|
|
259
|
+
.sort_by { |s| s[:priority] == 'high' ? 0 : 1 }
|
|
260
|
+
.first(limit)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Perform correlation analysis
|
|
264
|
+
def perform_correlation_analysis(data, structure)
|
|
265
|
+
return {} unless structure[:numeric_columns]&.length.to_i > 1
|
|
266
|
+
|
|
267
|
+
correlations = []
|
|
268
|
+
numeric_cols = structure[:numeric_columns]
|
|
269
|
+
|
|
270
|
+
# Calculate correlations between numeric column pairs
|
|
271
|
+
numeric_cols.combination(2).each do |col1, col2|
|
|
272
|
+
correlation = calculate_correlation(data, col1[:name], col2[:name])
|
|
273
|
+
|
|
274
|
+
correlations << {
|
|
275
|
+
columns: [col1[:name], col2[:name]],
|
|
276
|
+
correlation: correlation,
|
|
277
|
+
strength: interpret_correlation(correlation),
|
|
278
|
+
significant: correlation.abs > 0.5
|
|
279
|
+
}
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
{
|
|
283
|
+
pairs: correlations.sort_by { |c| -c[:correlation].abs },
|
|
284
|
+
strongest: correlations.max_by { |c| c[:correlation].abs }
|
|
285
|
+
}
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
# Helper methods
|
|
289
|
+
|
|
290
|
+
def determine_data_type(data)
|
|
291
|
+
case data
|
|
292
|
+
when Array
|
|
293
|
+
data.first.is_a?(Hash) ? 'tabular' : 'text'
|
|
294
|
+
when Hash
|
|
295
|
+
'json'
|
|
296
|
+
when String
|
|
297
|
+
'text'
|
|
298
|
+
else
|
|
299
|
+
'unknown'
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def count_rows(data)
|
|
304
|
+
case data
|
|
305
|
+
when Array then data.length
|
|
306
|
+
when Hash then data.keys.length
|
|
307
|
+
when String then data.lines.count
|
|
308
|
+
else 0
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def analyze_tabular_structure(data, structure)
|
|
313
|
+
return if data.empty?
|
|
314
|
+
|
|
315
|
+
first_row = data.first
|
|
316
|
+
first_row.keys.each do |key|
|
|
317
|
+
values = data.map { |row| row[key] }.compact
|
|
318
|
+
col_info = {
|
|
319
|
+
name: key,
|
|
320
|
+
type: infer_column_type(values),
|
|
321
|
+
null_count: data.length - values.length,
|
|
322
|
+
unique_values: values.uniq.length
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
structure[:columns] << col_info
|
|
326
|
+
|
|
327
|
+
case col_info[:type]
|
|
328
|
+
when 'numeric'
|
|
329
|
+
structure[:numeric_columns] << col_info
|
|
330
|
+
when 'categorical'
|
|
331
|
+
structure[:categorical_columns] << col_info
|
|
332
|
+
when 'text'
|
|
333
|
+
structure[:text_columns] << col_info
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def analyze_json_structure(data, structure)
|
|
339
|
+
keys = data.is_a?(Hash) ? data.keys : []
|
|
340
|
+
structure[:columns] = keys.map { |k| {name: k, type: 'json'} }
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def analyze_text_structure(data, structure)
|
|
344
|
+
lines = data.is_a?(Array) ? data : data.lines
|
|
345
|
+
structure[:line_count] = lines.length
|
|
346
|
+
structure[:total_chars] = lines.sum(&:length)
|
|
347
|
+
structure[:avg_line_length] = lines.empty? ? 0 : structure[:total_chars] / structure[:line_count]
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def infer_column_type(values)
|
|
351
|
+
sample = values.first(100)
|
|
352
|
+
|
|
353
|
+
numeric_count = sample.count { |v| v.to_s.match?(/^-?\d+\.?\d*$/) }
|
|
354
|
+
return 'numeric' if numeric_count > sample.length * 0.8
|
|
355
|
+
|
|
356
|
+
# Check for categorical data
|
|
357
|
+
unique_ratio = sample.uniq.length.to_f / sample.length
|
|
358
|
+
avg_length = sample.map(&:to_s).sum(&:length) / sample.length rescue 0
|
|
359
|
+
|
|
360
|
+
# If unique values are low relative to sample size, it's categorical
|
|
361
|
+
# Also consider short text values as likely categorical
|
|
362
|
+
return 'categorical' if unique_ratio < 0.7 || (unique_ratio < 0.9 && avg_length < 30)
|
|
363
|
+
|
|
364
|
+
'text'
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def generate_summary(structure)
|
|
368
|
+
"Dataset contains #{structure[:row_count]} rows with #{structure[:columns]&.length || 0} columns. " \
|
|
369
|
+
"#{structure[:numeric_columns]&.length || 0} numeric, " \
|
|
370
|
+
"#{structure[:categorical_columns]&.length || 0} categorical, " \
|
|
371
|
+
"#{structure[:text_columns]&.length || 0} text columns."
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def assess_data_quality(data, structure)
|
|
375
|
+
total_cells = structure[:row_count] * (structure[:columns]&.length || 0)
|
|
376
|
+
null_cells = structure[:columns]&.sum { |c| c[:null_count] || 0 } || 0
|
|
377
|
+
|
|
378
|
+
{
|
|
379
|
+
completeness: total_cells > 0 ? ((total_cells - null_cells).to_f / total_cells * 100).round(2) : 100,
|
|
380
|
+
null_percentage: total_cells > 0 ? (null_cells.to_f / total_cells * 100).round(2) : 0,
|
|
381
|
+
quality_score: calculate_quality_score(structure)
|
|
382
|
+
}
|
|
383
|
+
end
|
|
384
|
+
|
|
385
|
+
def calculate_quality_score(structure)
|
|
386
|
+
score = 100
|
|
387
|
+
|
|
388
|
+
# Penalize for high null counts
|
|
389
|
+
structure[:columns]&.each do |col|
|
|
390
|
+
null_ratio = col[:null_count].to_f / structure[:row_count]
|
|
391
|
+
score -= (null_ratio * 10) if null_ratio > 0.1
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
[score, 0].max.round(2)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def generate_recommendations(structure)
|
|
398
|
+
recommendations = []
|
|
399
|
+
|
|
400
|
+
# Check for high null counts
|
|
401
|
+
structure[:columns]&.each do |col|
|
|
402
|
+
null_ratio = col[:null_count].to_f / structure[:row_count]
|
|
403
|
+
if null_ratio > 0.2
|
|
404
|
+
recommendations << "Column '#{col[:name]}' has #{(null_ratio * 100).round(1)}% missing values - consider imputation or removal"
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Check for low variance categorical columns
|
|
409
|
+
structure[:categorical_columns]&.each do |col|
|
|
410
|
+
if col[:unique_values] == 1
|
|
411
|
+
recommendations << "Column '#{col[:name]}' has only one unique value - consider removing"
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
recommendations << "Data quality is good" if recommendations.empty?
|
|
416
|
+
recommendations
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def calculate_statistics(data, structure)
|
|
420
|
+
stats = {}
|
|
421
|
+
|
|
422
|
+
structure[:numeric_columns]&.each do |col|
|
|
423
|
+
values = data.map { |row| row[col[:name]].to_f }.compact
|
|
424
|
+
next if values.empty?
|
|
425
|
+
|
|
426
|
+
sorted = values.sort
|
|
427
|
+
stats[col[:name]] = {
|
|
428
|
+
min: sorted.first.round(2),
|
|
429
|
+
max: sorted.last.round(2),
|
|
430
|
+
mean: (values.sum / values.length).round(2),
|
|
431
|
+
median: sorted[sorted.length / 2].round(2),
|
|
432
|
+
std_dev: calculate_std_dev(values).round(2)
|
|
433
|
+
}
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
stats
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def calculate_std_dev(values)
|
|
440
|
+
mean = values.sum / values.length
|
|
441
|
+
variance = values.sum { |v| (v - mean) ** 2 } / values.length
|
|
442
|
+
Math.sqrt(variance)
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
def identify_patterns(data, structure)
|
|
446
|
+
patterns = {}
|
|
447
|
+
|
|
448
|
+
structure[:categorical_columns]&.each do |col|
|
|
449
|
+
values = data.map { |row| row[col[:name]] }.compact
|
|
450
|
+
frequency = values.each_with_object(Hash.new(0)) { |v, h| h[v] += 1 }
|
|
451
|
+
|
|
452
|
+
patterns[col[:name]] = {
|
|
453
|
+
most_common: frequency.max_by { |_, count| count },
|
|
454
|
+
distribution: frequency.sort_by { |_, count| -count }.first(5).to_h
|
|
455
|
+
}
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
patterns
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
def calculate_correlation(data, col1, col2)
|
|
462
|
+
values1 = data.map { |row| row[col1].to_f }
|
|
463
|
+
values2 = data.map { |row| row[col2].to_f }
|
|
464
|
+
|
|
465
|
+
return 0.0 if values1.empty? || values2.empty?
|
|
466
|
+
|
|
467
|
+
# Simplified correlation calculation
|
|
468
|
+
mean1 = values1.sum / values1.length
|
|
469
|
+
mean2 = values2.sum / values2.length
|
|
470
|
+
|
|
471
|
+
covariance = values1.zip(values2).sum { |v1, v2| (v1 - mean1) * (v2 - mean2) } / values1.length
|
|
472
|
+
std1 = Math.sqrt(values1.sum { |v| (v - mean1) ** 2 } / values1.length)
|
|
473
|
+
std2 = Math.sqrt(values2.sum { |v| (v - mean2) ** 2 } / values2.length)
|
|
474
|
+
|
|
475
|
+
return 0.0 if std1 == 0 || std2 == 0
|
|
476
|
+
|
|
477
|
+
(covariance / (std1 * std2)).round(3)
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def interpret_correlation(corr)
|
|
481
|
+
abs_corr = corr.abs
|
|
482
|
+
case abs_corr
|
|
483
|
+
when 0.0...0.3 then 'weak'
|
|
484
|
+
when 0.3...0.7 then 'moderate'
|
|
485
|
+
else 'strong'
|
|
486
|
+
end
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# Simulation methods for testing
|
|
490
|
+
|
|
491
|
+
def simulate_json_data
|
|
492
|
+
{
|
|
493
|
+
"users" => [
|
|
494
|
+
{"id" => 1, "name" => "Alice", "age" => 30, "score" => 85},
|
|
495
|
+
{"id" => 2, "name" => "Bob", "age" => 25, "score" => 92},
|
|
496
|
+
{"id" => 3, "name" => "Charlie", "age" => 35, "score" => 78}
|
|
497
|
+
]
|
|
498
|
+
}
|
|
499
|
+
end
|
|
500
|
+
|
|
501
|
+
def simulate_csv_data
|
|
502
|
+
[
|
|
503
|
+
{"id" => "1", "product" => "Widget A", "sales" => "100", "revenue" => "1000.50", "category" => "Electronics"},
|
|
504
|
+
{"id" => "2", "product" => "Widget B", "sales" => "150", "revenue" => "2250.75", "category" => "Electronics"},
|
|
505
|
+
{"id" => "3", "product" => "Gadget C", "sales" => "80", "revenue" => "960.00", "category" => "Home"},
|
|
506
|
+
{"id" => "4", "product" => "Tool D", "sales" => "120", "revenue" => "1800.00", "category" => "Tools"},
|
|
507
|
+
{"id" => "5", "product" => "Widget E", "sales" => "90", "revenue" => "1350.00", "category" => "Electronics"}
|
|
508
|
+
]
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def simulate_api_response
|
|
512
|
+
{
|
|
513
|
+
"status" => "success",
|
|
514
|
+
"data" => simulate_csv_data,
|
|
515
|
+
"timestamp" => Time.now.iso8601
|
|
516
|
+
}
|
|
517
|
+
end
|
|
518
|
+
end
|
|
519
|
+
end
|
|
520
|
+
end
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SharedTools
|
|
4
|
+
module Tools
|
|
5
|
+
module Computer
|
|
6
|
+
# A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
|
|
7
|
+
#
|
|
8
|
+
# @example
|
|
9
|
+
# class SomeDriver < BaseDriver
|
|
10
|
+
# @param text [String]
|
|
11
|
+
# def key(text:)
|
|
12
|
+
# # TODO
|
|
13
|
+
# end
|
|
14
|
+
#
|
|
15
|
+
# # @param text [String]
|
|
16
|
+
# # @param duration [Integer]
|
|
17
|
+
# def hold_key(text:, duration:)
|
|
18
|
+
# # TODO
|
|
19
|
+
# end
|
|
20
|
+
#
|
|
21
|
+
# # @return [Hash<{ x: Integer, y: Integer }>]
|
|
22
|
+
# def mouse_position
|
|
23
|
+
# # TODO
|
|
24
|
+
# end
|
|
25
|
+
#
|
|
26
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
27
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
|
28
|
+
# def mouse_move(coordinate:)
|
|
29
|
+
# # TODO
|
|
30
|
+
# end
|
|
31
|
+
#
|
|
32
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
33
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
|
34
|
+
# def mouse_click(coordinate:, button:)
|
|
35
|
+
# # TODO
|
|
36
|
+
# end
|
|
37
|
+
#
|
|
38
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
39
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
|
40
|
+
# def mouse_down(coordinate:, button:)
|
|
41
|
+
# # TODO
|
|
42
|
+
# end
|
|
43
|
+
#
|
|
44
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
45
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
|
46
|
+
# def mouse_up(coordinate:, button:)
|
|
47
|
+
# # TODO
|
|
48
|
+
# end
|
|
49
|
+
#
|
|
50
|
+
# # @param text [String]
|
|
51
|
+
# def type(text:)
|
|
52
|
+
# # TODO
|
|
53
|
+
# end
|
|
54
|
+
#
|
|
55
|
+
# # @param amount [Integer]
|
|
56
|
+
# # @param direction [String] e.g. "up", "down", "left", "right"
|
|
57
|
+
# def scroll(amount:, direction:)
|
|
58
|
+
# # TODO
|
|
59
|
+
# end
|
|
60
|
+
#
|
|
61
|
+
# # @yield [file]
|
|
62
|
+
# # @yieldparam file [File]
|
|
63
|
+
# def screenshot
|
|
64
|
+
# # TODO
|
|
65
|
+
# end
|
|
66
|
+
# end
|
|
67
|
+
class BaseDriver
|
|
68
|
+
DEFAULT_MOUSE_BUTTON = "left"
|
|
69
|
+
DEFAULT_DISPLAY_SCALE = 2
|
|
70
|
+
|
|
71
|
+
# @!attr_accessor :display_height
|
|
72
|
+
# @return [Integer] the height of the display in pixels
|
|
73
|
+
attr_accessor :display_width
|
|
74
|
+
|
|
75
|
+
# @!attr_accessor :display_height
|
|
76
|
+
# @return [Integer] the height of the display in pixels
|
|
77
|
+
attr_accessor :display_height
|
|
78
|
+
|
|
79
|
+
# @!attr_accessor :display_number
|
|
80
|
+
# @return [Integer] the display number
|
|
81
|
+
attr_accessor :display_number
|
|
82
|
+
|
|
83
|
+
# @param display_width [Integer] the width of the display in pixels
|
|
84
|
+
# @param display_height [Integer] the height of the display in pixels
|
|
85
|
+
# @param display_number [Integer] the display number
|
|
86
|
+
def initialize(display_width:, display_height:, display_number:)
|
|
87
|
+
@display_width = display_width
|
|
88
|
+
@display_height = display_height
|
|
89
|
+
|
|
90
|
+
@display_number = display_number
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# @param text [String]
|
|
94
|
+
def key(text:)
|
|
95
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# @param text [String]
|
|
99
|
+
# @param duration [Integer]
|
|
100
|
+
def hold_key(text:, duration:)
|
|
101
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @return [Hash<{ x: Integer, y: Integer }>]
|
|
105
|
+
def mouse_position
|
|
106
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
110
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
111
|
+
def mouse_move(coordinate:)
|
|
112
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
116
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
117
|
+
def mouse_click(coordinate:, button:)
|
|
118
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
122
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
123
|
+
def mouse_down(coordinate:, button:)
|
|
124
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
128
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
129
|
+
def mouse_up(coordinate:, button:)
|
|
130
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
134
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
135
|
+
def mouse_drag(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
|
136
|
+
mouse_down(coordinate: mouse_position, button:)
|
|
137
|
+
mouse_move(coordinate:, button:)
|
|
138
|
+
mouse_up(coordinate:, button:)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
142
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
143
|
+
def mouse_double_click(coordinate:, button:)
|
|
144
|
+
2.times { mouse_click(coordinate:, button:) }
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
|
148
|
+
# @param button [String] e.g. "left", "middle", "right"
|
|
149
|
+
def mouse_triple_click(coordinate:, button:)
|
|
150
|
+
3.times { mouse_click(coordinate:, button:) }
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# @param text [String]
|
|
154
|
+
def type(text:)
|
|
155
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# @param amount [Integer]
|
|
159
|
+
# @param direction [String] e.g. "up", "down", "left", "right"
|
|
160
|
+
def scroll(amount:, direction:)
|
|
161
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# @yield [file]
|
|
165
|
+
# @yieldparam file [File]
|
|
166
|
+
def screenshot
|
|
167
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# @param duration [Integer]
|
|
171
|
+
def wait(duration:)
|
|
172
|
+
Kernel.sleep(duration)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|