ai_root_shield 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,512 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "onnxruntime"
4
+ require "numo/narray"
5
+
6
+ module AiRootShield
7
+ # AI-powered behavioral analysis using ONNX models
8
+ class AiBehavioralAnalyzer
9
+ DEFAULT_MODEL_PATH = File.join(__dir__, "..", "..", "models", "behavioral_model.onnx")
10
+
11
+ # Feature indices for the ML model
12
+ FEATURE_INDICES = {
13
+ file_access_entropy: 0,
14
+ sensor_consistency_score: 1,
15
+ hardware_fingerprint_score: 2,
16
+ process_behavior_score: 3,
17
+ network_pattern_score: 4,
18
+ timing_analysis_score: 5,
19
+ system_call_entropy: 6,
20
+ memory_access_pattern: 7
21
+ }.freeze
22
+
23
+ def initialize(model_path: nil)
24
+ @model_path = model_path || DEFAULT_MODEL_PATH
25
+ @model = nil
26
+ @confidence_threshold = 0.7
27
+ load_model if File.exist?(@model_path)
28
+ end
29
+
30
+ # Perform AI behavioral analysis on device data
31
+ # @param device_data [Hash] Parsed device data
32
+ # @return [Hash] Analysis result with AI confidence and behavioral factors
33
+ def analyze(device_data)
34
+ return fallback_analysis(device_data) unless @model
35
+
36
+ features = extract_behavioral_features(device_data)
37
+ prediction = run_inference(features)
38
+
39
+ {
40
+ ai_confidence: prediction[:confidence],
41
+ behavioral_risk_score: prediction[:risk_score],
42
+ behavioral_factors: prediction[:factors],
43
+ anomaly_indicators: detect_anomalies(device_data, features),
44
+ ml_emulator_score: calculate_ml_emulator_score(features)
45
+ }
46
+ end
47
+
48
+ private
49
+
50
+ def load_model
51
+ begin
52
+ @model = OnnxRuntime::Model.new(@model_path)
53
+ rescue => e
54
+ puts "Warning: Could not load ONNX model at #{@model_path}: #{e.message}"
55
+ @model = nil
56
+ end
57
+ end
58
+
59
+ def extract_behavioral_features(device_data)
60
+ features = Numo::SFloat.zeros(FEATURE_INDICES.size)
61
+
62
+ # File access pattern entropy
63
+ features[FEATURE_INDICES[:file_access_entropy]] = calculate_file_access_entropy(device_data)
64
+
65
+ # Sensor data consistency
66
+ features[FEATURE_INDICES[:sensor_consistency_score]] = calculate_sensor_consistency(device_data)
67
+
68
+ # Hardware fingerprint score
69
+ features[FEATURE_INDICES[:hardware_fingerprint_score]] = calculate_hardware_fingerprint_score(device_data)
70
+
71
+ # Process behavior analysis
72
+ features[FEATURE_INDICES[:process_behavior_score]] = analyze_process_behavior(device_data)
73
+
74
+ # Network pattern analysis
75
+ features[FEATURE_INDICES[:network_pattern_score]] = analyze_network_patterns(device_data)
76
+
77
+ # Timing analysis
78
+ features[FEATURE_INDICES[:timing_analysis_score]] = analyze_timing_patterns(device_data)
79
+
80
+ # System call entropy
81
+ features[FEATURE_INDICES[:system_call_entropy]] = calculate_system_call_entropy(device_data)
82
+
83
+ # Memory access patterns
84
+ features[FEATURE_INDICES[:memory_access_pattern]] = analyze_memory_patterns(device_data)
85
+
86
+ features
87
+ end
88
+
89
+ def calculate_file_access_entropy(device_data)
90
+ file_accesses = extract_file_accesses(device_data)
91
+ return 0.0 if file_accesses.empty?
92
+
93
+ # Calculate Shannon entropy of file access patterns
94
+ access_counts = file_accesses.group_by(&:itself).transform_values(&:size)
95
+ total_accesses = file_accesses.size.to_f
96
+
97
+ entropy = access_counts.values.reduce(0.0) do |sum, count|
98
+ probability = count / total_accesses
99
+ sum - (probability * Math.log2(probability))
100
+ end
101
+
102
+ # Normalize to 0-1 range (typical entropy range is 0-8 for file paths)
103
+ [entropy / 8.0, 1.0].min
104
+ end
105
+
106
+ def calculate_sensor_consistency(device_data)
107
+ sensors = device_data.dig(:hardware_info, :sensors) || []
108
+ return 0.0 if sensors.empty?
109
+
110
+ # Expected sensor combinations for real devices
111
+ expected_sensors = %w[accelerometer gyroscope magnetometer proximity light]
112
+ missing_sensors = expected_sensors - sensors.map(&:downcase)
113
+
114
+ # Check for sensor data consistency
115
+ sensor_data = device_data[:sensor_data] || {}
116
+ consistency_score = 0.0
117
+
118
+ # Accelerometer consistency (should have realistic values and noise)
119
+ if sensor_data["accelerometer"]
120
+ accel_values = sensor_data["accelerometer"]["values"] || []
121
+ consistency_score += analyze_sensor_realism(accel_values, "accelerometer")
122
+ end
123
+
124
+ # Gyroscope consistency
125
+ if sensor_data["gyroscope"]
126
+ gyro_values = sensor_data["gyroscope"]["values"] || []
127
+ consistency_score += analyze_sensor_realism(gyro_values, "gyroscope")
128
+ end
129
+
130
+ # Penalize for missing critical sensors
131
+ consistency_score -= (missing_sensors.size * 0.2)
132
+
133
+ [consistency_score, 1.0].min.clamp(0.0, 1.0)
134
+ end
135
+
136
+ def calculate_hardware_fingerprint_score(device_data)
137
+ hardware = device_data[:hardware_info] || {}
138
+
139
+ # Analyze hardware characteristics for emulator indicators
140
+ score = 1.0
141
+
142
+ # Device model analysis
143
+ device_model = hardware[:device_model].to_s.downcase
144
+ if device_model.include?("generic") || device_model.include?("emulator")
145
+ score -= 0.3
146
+ end
147
+
148
+ # Manufacturer analysis
149
+ manufacturer = hardware[:manufacturer].to_s.downcase
150
+ if manufacturer.include?("android") || manufacturer.empty?
151
+ score -= 0.2
152
+ end
153
+
154
+ # Serial number patterns
155
+ serial = hardware[:serial_number].to_s
156
+ if serial.include?("android") || serial == "unknown" || serial.empty?
157
+ score -= 0.2
158
+ end
159
+
160
+ # Baseband analysis
161
+ baseband = hardware[:baseband_version]
162
+ if baseband.nil? || baseband.to_s.empty?
163
+ score -= 0.3
164
+ end
165
+
166
+ [score, 1.0].min.clamp(0.0, 1.0)
167
+ end
168
+
169
+ def analyze_process_behavior(device_data)
170
+ processes = device_data[:processes] || []
171
+ return 0.5 if processes.empty?
172
+
173
+ suspicious_patterns = 0
174
+ total_processes = processes.size
175
+
176
+ processes.each do |process|
177
+ next unless process.is_a?(Hash)
178
+
179
+ process_name = process["name"].to_s.downcase
180
+
181
+ # Check for emulator-specific processes
182
+ if process_name.match?(/qemu|goldfish|ranchu|genymotion/)
183
+ suspicious_patterns += 1
184
+ end
185
+
186
+ # Check for debugging processes
187
+ if process_name.match?(/gdb|lldb|frida|strace/)
188
+ suspicious_patterns += 1
189
+ end
190
+
191
+ # Analyze process memory patterns
192
+ memory_maps = process["memory_maps"] || []
193
+ if memory_maps.any? { |map| map["permissions"]&.include?("x") && map["path"]&.start_with?("/data") }
194
+ suspicious_patterns += 1
195
+ end
196
+ end
197
+
198
+ # Return normalized suspicion score (lower is more suspicious)
199
+ 1.0 - (suspicious_patterns.to_f / [total_processes, 1].max)
200
+ end
201
+
202
+ def analyze_network_patterns(device_data)
203
+ network = device_data[:network_config] || {}
204
+
205
+ score = 1.0
206
+
207
+ # Proxy configuration analysis
208
+ if network.dig(:proxy_settings, "enabled")
209
+ proxy_host = network.dig(:proxy_settings, "host").to_s
210
+ proxy_port = network.dig(:proxy_settings, "port")
211
+
212
+ # Localhost proxies are suspicious
213
+ if proxy_host.match?(/localhost|127\.0\.0\.1|::1/)
214
+ score -= 0.3
215
+ end
216
+
217
+ # Common MITM ports
218
+ if [8080, 8888, 3128, 8081, 8082].include?(proxy_port)
219
+ score -= 0.2
220
+ end
221
+ end
222
+
223
+ # VPN analysis
224
+ if network[:vpn_active]
225
+ score -= 0.1 # VPN itself is not necessarily suspicious
226
+ end
227
+
228
+ # Certificate analysis
229
+ certificates = network[:certificates] || []
230
+ user_certs = certificates.count { |cert| cert["user_installed"] }
231
+ if user_certs > 0
232
+ score -= (user_certs * 0.15)
233
+ end
234
+
235
+ [score, 1.0].min.clamp(0.0, 1.0)
236
+ end
237
+
238
+ def analyze_timing_patterns(device_data)
239
+ # Analyze timing patterns in system events
240
+ logs = device_data[:logs] || []
241
+ return 0.5 if logs.empty?
242
+
243
+ # Extract timestamps if available
244
+ timestamps = logs.filter_map do |log|
245
+ next unless log.is_a?(Hash) && log["timestamp"]
246
+ Time.parse(log["timestamp"]) rescue nil
247
+ end
248
+
249
+ return 0.5 if timestamps.size < 2
250
+
251
+ # Calculate time intervals between events
252
+ intervals = timestamps.each_cons(2).map { |t1, t2| (t2 - t1).abs }
253
+
254
+ # Real devices should have some variation in timing
255
+ if intervals.uniq.size == 1
256
+ # Perfectly regular intervals suggest automation/emulation
257
+ return 0.2
258
+ end
259
+
260
+ # Calculate coefficient of variation
261
+ mean_interval = intervals.sum / intervals.size
262
+ variance = intervals.sum { |i| (i - mean_interval) ** 2 } / intervals.size
263
+ std_dev = Math.sqrt(variance)
264
+
265
+ cv = mean_interval > 0 ? std_dev / mean_interval : 0
266
+
267
+ # Higher variation is more realistic (up to a point)
268
+ [cv * 2, 1.0].min
269
+ end
270
+
271
+ def calculate_system_call_entropy(device_data)
272
+ # Analyze system call patterns from logs
273
+ logs = device_data[:logs] || []
274
+ system_calls = logs.filter_map do |log|
275
+ log_text = log.is_a?(Hash) ? log["message"] : log.to_s
276
+ # Extract system call names from log entries
277
+ log_text.scan(/\b(open|read|write|close|mmap|ioctl|socket)\b/).flatten
278
+ end
279
+
280
+ return 0.5 if system_calls.empty?
281
+
282
+ # Calculate entropy of system call distribution
283
+ call_counts = system_calls.group_by(&:itself).transform_values(&:size)
284
+ total_calls = system_calls.size.to_f
285
+
286
+ entropy = call_counts.values.reduce(0.0) do |sum, count|
287
+ probability = count / total_calls
288
+ sum - (probability * Math.log2(probability))
289
+ end
290
+
291
+ # Normalize entropy (typical range 0-3 for system calls)
292
+ [entropy / 3.0, 1.0].min
293
+ end
294
+
295
+ def analyze_memory_patterns(device_data)
296
+ processes = device_data[:processes] || []
297
+ return 0.5 if processes.empty?
298
+
299
+ suspicious_memory_patterns = 0
300
+ total_memory_regions = 0
301
+
302
+ processes.each do |process|
303
+ next unless process.is_a?(Hash)
304
+
305
+ memory_maps = process["memory_maps"] || []
306
+ total_memory_regions += memory_maps.size
307
+
308
+ memory_maps.each do |map|
309
+ next unless map.is_a?(Hash)
310
+
311
+ # Check for suspicious memory patterns
312
+ if map["path"]&.include?("/dev/ashmem") && map["size"].to_i > 100_000_000
313
+ suspicious_memory_patterns += 1
314
+ end
315
+
316
+ # Executable memory in data segments
317
+ if map["permissions"]&.include?("x") && map["path"]&.start_with?("/data")
318
+ suspicious_memory_patterns += 1
319
+ end
320
+ end
321
+ end
322
+
323
+ return 0.5 if total_memory_regions == 0
324
+
325
+ # Return normalized score (lower means more suspicious)
326
+ 1.0 - (suspicious_memory_patterns.to_f / total_memory_regions)
327
+ end
328
+
329
+ def run_inference(features)
330
+ return fallback_prediction(features) unless @model
331
+
332
+ begin
333
+ # Prepare input for ONNX model
334
+ input_data = { "input" => features.reshape(1, -1) }
335
+
336
+ # Run inference
337
+ output = @model.predict(input_data)
338
+
339
+ # Extract predictions (assuming model outputs risk_score and confidence)
340
+ risk_score = output["risk_score"].first.first
341
+ confidence = output["confidence"].first.first
342
+
343
+ # Generate factors based on feature analysis
344
+ factors = generate_behavioral_factors(features, risk_score)
345
+
346
+ {
347
+ risk_score: (risk_score * 100).round,
348
+ confidence: confidence,
349
+ factors: factors
350
+ }
351
+ rescue => e
352
+ puts "Warning: ONNX inference failed: #{e.message}"
353
+ fallback_prediction(features)
354
+ end
355
+ end
356
+
357
+ def fallback_prediction(features)
358
+ # Simple rule-based prediction when ONNX model is not available
359
+ risk_indicators = 0
360
+
361
+ # Check each feature for suspicious values
362
+ risk_indicators += 1 if features[FEATURE_INDICES[:file_access_entropy]] < 0.3
363
+ risk_indicators += 1 if features[FEATURE_INDICES[:sensor_consistency_score]] < 0.5
364
+ risk_indicators += 1 if features[FEATURE_INDICES[:hardware_fingerprint_score]] < 0.6
365
+ risk_indicators += 1 if features[FEATURE_INDICES[:process_behavior_score]] < 0.5
366
+ risk_indicators += 1 if features[FEATURE_INDICES[:network_pattern_score]] < 0.7
367
+
368
+ risk_score = (risk_indicators / FEATURE_INDICES.size.to_f * 100).round
369
+ confidence = 0.6 # Lower confidence for fallback method
370
+
371
+ {
372
+ risk_score: risk_score,
373
+ confidence: confidence,
374
+ factors: generate_behavioral_factors(features, risk_score / 100.0)
375
+ }
376
+ end
377
+
378
+ def generate_behavioral_factors(features, risk_score)
379
+ factors = []
380
+
381
+ factors << "LOW_FILE_ACCESS_ENTROPY" if features[FEATURE_INDICES[:file_access_entropy]] < 0.3
382
+ factors << "INCONSISTENT_SENSOR_DATA" if features[FEATURE_INDICES[:sensor_consistency_score]] < 0.5
383
+ factors << "SUSPICIOUS_HARDWARE_FINGERPRINT" if features[FEATURE_INDICES[:hardware_fingerprint_score]] < 0.6
384
+ factors << "ANOMALOUS_PROCESS_BEHAVIOR" if features[FEATURE_INDICES[:process_behavior_score]] < 0.5
385
+ factors << "SUSPICIOUS_NETWORK_PATTERNS" if features[FEATURE_INDICES[:network_pattern_score]] < 0.7
386
+ factors << "IRREGULAR_TIMING_PATTERNS" if features[FEATURE_INDICES[:timing_analysis_score]] < 0.4
387
+ factors << "LOW_SYSTEM_CALL_ENTROPY" if features[FEATURE_INDICES[:system_call_entropy]] < 0.3
388
+ factors << "ANOMALOUS_MEMORY_PATTERNS" if features[FEATURE_INDICES[:memory_access_pattern]] < 0.4
389
+
390
+ # Add high-level behavioral indicators
391
+ factors << "AI_BEHAVIORAL_ANOMALY" if risk_score > 0.7
392
+ factors << "ML_EMULATOR_DETECTED" if calculate_ml_emulator_score(features) > 0.8
393
+
394
+ factors
395
+ end
396
+
397
+ def detect_anomalies(device_data, features)
398
+ anomalies = []
399
+
400
+ # File access anomalies
401
+ if features[FEATURE_INDICES[:file_access_entropy]] < 0.2
402
+ anomalies << {
403
+ type: "file_access_pattern",
404
+ severity: "high",
405
+ description: "Extremely low entropy in file access patterns suggests automated behavior"
406
+ }
407
+ end
408
+
409
+ # Sensor anomalies
410
+ if features[FEATURE_INDICES[:sensor_consistency_score]] < 0.3
411
+ anomalies << {
412
+ type: "sensor_inconsistency",
413
+ severity: "medium",
414
+ description: "Sensor data patterns inconsistent with real device behavior"
415
+ }
416
+ end
417
+
418
+ # Hardware fingerprint anomalies
419
+ if features[FEATURE_INDICES[:hardware_fingerprint_score]] < 0.4
420
+ anomalies << {
421
+ type: "hardware_fingerprint",
422
+ severity: "high",
423
+ description: "Hardware characteristics suggest emulated environment"
424
+ }
425
+ end
426
+
427
+ anomalies
428
+ end
429
+
430
+ def calculate_ml_emulator_score(features)
431
+ # ML-based emulator detection using multiple features
432
+ emulator_indicators = 0
433
+ total_indicators = 5
434
+
435
+ # Hardware fingerprint is strong indicator
436
+ emulator_indicators += 2 if features[FEATURE_INDICES[:hardware_fingerprint_score]] < 0.5
437
+
438
+ # Sensor consistency
439
+ emulator_indicators += 1 if features[FEATURE_INDICES[:sensor_consistency_score]] < 0.4
440
+
441
+ # Process behavior
442
+ emulator_indicators += 1 if features[FEATURE_INDICES[:process_behavior_score]] < 0.3
443
+
444
+ # Memory patterns
445
+ emulator_indicators += 1 if features[FEATURE_INDICES[:memory_access_pattern]] < 0.3
446
+
447
+ (emulator_indicators.to_f / total_indicators).clamp(0.0, 1.0)
448
+ end
449
+
450
+ def fallback_analysis(device_data)
451
+ {
452
+ ai_confidence: 0.5,
453
+ behavioral_risk_score: 0,
454
+ behavioral_factors: [],
455
+ anomaly_indicators: [],
456
+ ml_emulator_score: 0.0
457
+ }
458
+ end
459
+
460
+ # Helper methods for feature extraction
461
+
462
+ def extract_file_accesses(device_data)
463
+ file_accesses = []
464
+
465
+ # Extract from logs
466
+ logs = device_data[:logs] || []
467
+ logs.each do |log|
468
+ log_text = log.is_a?(Hash) ? log["message"] : log.to_s
469
+ # Extract file paths from log entries
470
+ file_paths = log_text.scan(%r{/[/\w.-]+})
471
+ file_accesses.concat(file_paths)
472
+ end
473
+
474
+ # Extract from process information
475
+ processes = device_data[:processes] || []
476
+ processes.each do |process|
477
+ next unless process.is_a?(Hash)
478
+
479
+ if process["open_files"]
480
+ file_accesses.concat(process["open_files"])
481
+ end
482
+ end
483
+
484
+ file_accesses.uniq
485
+ end
486
+
487
+ def analyze_sensor_realism(values, sensor_type)
488
+ return 0.0 if values.empty?
489
+
490
+ # Convert to numeric values
491
+ numeric_values = values.filter_map { |v| Float(v) rescue nil }
492
+ return 0.0 if numeric_values.empty?
493
+
494
+ case sensor_type
495
+ when "accelerometer"
496
+ # Accelerometer should have realistic range and noise
497
+ realistic_range = numeric_values.all? { |v| v.abs <= 20.0 } # Reasonable G-force range
498
+ has_variation = numeric_values.uniq.size > 1
499
+
500
+ realistic_range && has_variation ? 0.5 : 0.0
501
+ when "gyroscope"
502
+ # Gyroscope should have realistic angular velocity range
503
+ realistic_range = numeric_values.all? { |v| v.abs <= 2000.0 } # Degrees per second
504
+ has_variation = numeric_values.uniq.size > 1
505
+
506
+ realistic_range && has_variation ? 0.5 : 0.0
507
+ else
508
+ 0.3 # Default score for other sensors
509
+ end
510
+ end
511
+ end
512
+ end
@@ -11,12 +11,15 @@ module AiRootShield
11
11
  enable_hooking_detection: true,
12
12
  enable_integrity_checks: true,
13
13
  enable_network_analysis: true,
14
- risk_threshold: 50
14
+ enable_ai_behavioral_analysis: true,
15
+ risk_threshold: 50,
16
+ ai_confidence_threshold: 0.7
15
17
  }.freeze
16
18
 
17
19
  def initialize(config = {})
18
20
  @config = DEFAULT_CONFIG.merge(config)
19
21
  @analyzers = initialize_analyzers
22
+ @ai_analyzer = AiBehavioralAnalyzer.new if @config[:enable_ai_behavioral_analysis]
20
23
  end
21
24
 
22
25
  # Perform comprehensive device security scan
@@ -27,6 +30,7 @@ module AiRootShield
27
30
 
28
31
  detected_factors = []
29
32
  risk_scores = []
33
+ ai_result = nil
30
34
 
31
35
  @analyzers.each do |analyzer|
32
36
  next unless analyzer_enabled?(analyzer)
@@ -36,14 +40,36 @@ module AiRootShield
36
40
  risk_scores << result[:risk_score]
37
41
  end
38
42
 
39
- overall_risk = RiskCalculator.calculate_overall_risk(risk_scores, detected_factors)
43
+ # Perform AI behavioral analysis if enabled
44
+ if @ai_analyzer && @config[:enable_ai_behavioral_analysis]
45
+ ai_result = @ai_analyzer.analyze(device_data)
46
+ detected_factors.concat(ai_result[:behavioral_factors])
47
+ risk_scores << ai_result[:behavioral_risk_score]
48
+ end
49
+
50
+ overall_risk = RiskCalculator.calculate_overall_risk(
51
+ risk_scores,
52
+ detected_factors,
53
+ ai_confidence: ai_result&.dig(:ai_confidence)
54
+ )
40
55
 
41
- {
56
+ result = {
42
57
  risk_score: overall_risk,
43
58
  factors: detected_factors.uniq,
44
59
  timestamp: Time.now.to_i,
45
60
  version: AiRootShield::VERSION
46
61
  }
62
+
63
+ # Add AI-specific results if available
64
+ if ai_result
65
+ result.merge!({
66
+ ai_confidence: ai_result[:ai_confidence],
67
+ ml_emulator_score: ai_result[:ml_emulator_score],
68
+ anomaly_indicators: ai_result[:anomaly_indicators]
69
+ })
70
+ end
71
+
72
+ result
47
73
  end
48
74
 
49
75
  private
@@ -70,6 +96,8 @@ module AiRootShield
70
96
  @config[:enable_integrity_checks]
71
97
  when "NetworkAnalyzer"
72
98
  @config[:enable_network_analysis]
99
+ when "AiBehavioralAnalyzer"
100
+ @config[:enable_ai_behavioral_analysis]
73
101
  else
74
102
  true
75
103
  end